/* adapted from official xz source: xz/src/xz/list.c */ static lzma_index* read_index(File file, uint64_t memlimit, std::string &error) { union { unsigned char buf[4096]; unsigned char u8[LZMA_STREAM_HEADER_SIZE]; uint32_t u32[LZMA_STREAM_HEADER_SIZE/4]; } buf; lzma_stream strm = LZMA_STREAM_INIT; lzma_index *cur_index = nullptr; lzma_index *col_index = nullptr; FileReaderState *filestate = nullptr; lzma_stream_flags header_flags; lzma_stream_flags footer_flags; lzma_ret ret; // Current position in the file. We parse the file backwards so // initialize it to point to the end of the file. int64_t pos = file->filesize();; // Each loop iteration decodes one Index. do { lzma_vli stream_padding, index_size; uint64_t memused; // Check that there is enough data left to contain at least // the Stream Header and Stream Footer. This check cannot // fail in the first pass of this loop. if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { error.assign("file too small for xz archive"); goto failed; } pos -= LZMA_STREAM_HEADER_SIZE; stream_padding = 0; // Locate the Stream Footer. There may be Stream Padding which // we must skip when reading backwards. for (;;) { if (pos < LZMA_STREAM_HEADER_SIZE) { error.assign("file too small for xz archive"); goto failed; } if (!file->readInto(filestate, pos, LZMA_STREAM_HEADER_SIZE, buf.buf, error)) goto failed; /* padding must be a multiple of 4 */ if (buf.u32[2] != 0) break; pos -= 4; stream_padding += 4; if (buf.u32[1] != 0) break; pos -= 4; stream_padding += 4; if (buf.u32[0] != 0) break; pos -= 4; stream_padding += 4; } // Decode the Stream Footer. ret = lzma_stream_footer_decode(&footer_flags, buf.u8); if (LZMA_OK != ret) { errnoLzmaToStr("invalid footer", ret, error); goto failed; } // Check that the size of the Index field looks sane. index_size = footer_flags.backward_size; if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) { error.assign("invalid index size"); goto failed; } // Set pos to the beginning of the Index. pos -= index_size; // See how much memory we can use for decoding this Index. memused = nullptr != col_index ? lzma_index_memused(col_index) : 0; if (memused > memlimit) { error.assign("mem limit hit"); goto failed; } // Decode the Index. ret = lzma_index_decoder(&strm, &cur_index, memlimit - memused); if (ret != LZMA_OK) { errnoLzmaToStr("couldn't allocate new index", ret, error); goto failed; } do { ssize_t want = (index_size < sizeof(buf.buf) ? index_size : sizeof(buf.buf)); if (want < 0) { ret = LZMA_DATA_ERROR; break; } if (!file->readInto(filestate, pos, want, buf.buf, error)) goto failed; strm.avail_in = want; strm.next_in = buf.buf; pos += want; index_size -= want; ret = lzma_code(&strm, LZMA_RUN); } while (ret == LZMA_OK); // If the decoding seems to be successful, check also that // the Index decoder consumed as much input as indicated // by the Backward Size field. if (ret == LZMA_STREAM_END) if (index_size != 0 || strm.avail_in != 0) ret = LZMA_DATA_ERROR; if (ret != LZMA_STREAM_END) { // LZMA_BUFFER_ERROR means that the Index decoder // would have liked more input than what the Index // size should be according to Stream Footer. // The message for LZMA_DATA_ERROR makes more // sense in that case. if (ret == LZMA_BUF_ERROR) ret = LZMA_DATA_ERROR; errnoLzmaToStr("decoding index failed", ret, error); goto failed; } // Decode the Stream Header and check that its Stream Flags // match the Stream Footer. pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE; if ((lzma_vli)(pos) < lzma_index_total_size(cur_index)) { error.assign("invalid archive - index large than available data"); goto failed; } pos -= lzma_index_total_size(cur_index); if (!file->readInto(filestate, pos, LZMA_STREAM_HEADER_SIZE, buf.buf, error)) goto failed; ret = lzma_stream_header_decode(&header_flags, buf.u8); if (ret != LZMA_OK) { errnoLzmaToStr("invalid header", ret, error); goto failed; } ret = lzma_stream_flags_compare(&header_flags, &footer_flags); if (ret != LZMA_OK) { errnoLzmaToStr("invalid stream: footer doesn't match header", ret, error); goto failed; } // Store the decoded Stream Flags into this_index. This is // needed so that we can print which Check is used in each // Stream. ret = lzma_index_stream_flags(cur_index, &footer_flags); if (ret != LZMA_OK) { errnoLzmaToStr("decoding stream flags failed", ret, error); goto failed; } // Store also the size of the Stream Padding field. It is // needed to show the offsets of the Streams correctly. ret = lzma_index_stream_padding(cur_index, stream_padding); if (ret != LZMA_OK) { errnoLzmaToStr("storing stream padding failed", ret, error); goto failed; } if (nullptr != col_index) { // Append the earlier decoded Indexes // after this_index. ret = lzma_index_cat(cur_index, col_index, NULL); col_index = nullptr; if (ret != LZMA_OK) { errnoLzmaToStr("failed to concatenate indexes", ret, error); goto failed; } } col_index = cur_index; cur_index = nullptr; } while (pos > 0); lzma_end(&strm); file->finish(filestate); return col_index; failed: lzma_end(&strm); if (nullptr != cur_index) lzma_index_end(cur_index, NULL); if (nullptr != col_index) lzma_index_end(col_index, NULL); file->finish(filestate); return nullptr; }
/// \brief Parse the Index(es) from the given .xz file /// /// \param xfi Pointer to structure where the decoded information /// is stored. /// \param pair Input file /// /// \return On success, false is returned. On error, true is returned. /// // TODO: This function is pretty big. liblzma should have a function that // takes a callback function to parse the Index(es) from a .xz file to make // it easy for applications. static bool parse_indexes(xz_file_info *xfi, int src_fd) { struct stat st; fstat(src_fd, &st); if (st.st_size <= 0) { return true; } if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { return true; } io_buf buf; lzma_stream_flags header_flags; lzma_stream_flags footer_flags; lzma_ret ret; // lzma_stream for the Index decoder lzma_stream strm = LZMA_STREAM_INIT; // All Indexes decoded so far lzma_index *combined_index = NULL; // The Index currently being decoded lzma_index *this_index = NULL; // Current position in the file. We parse the file backwards so // initialize it to point to the end of the file. off_t pos = st.st_size; // Each loop iteration decodes one Index. do { // Check that there is enough data left to contain at least // the Stream Header and Stream Footer. This check cannot // fail in the first pass of this loop. if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { goto error; } pos -= LZMA_STREAM_HEADER_SIZE; lzma_vli stream_padding = 0; // Locate the Stream Footer. There may be Stream Padding which // we must skip when reading backwards. while (true) { if (pos < LZMA_STREAM_HEADER_SIZE) { goto error; } if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos)) goto error; // Stream Padding is always a multiple of four bytes. int i = 2; if (buf.u32[i] != 0) break; // To avoid calling io_pread() for every four bytes // of Stream Padding, take advantage that we read // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and // check them too before calling io_pread() again. do { stream_padding += 4; pos -= 4; --i; } while (i >= 0 && buf.u32[i] == 0); } // Decode the Stream Footer. ret = lzma_stream_footer_decode(&footer_flags, buf.u8); if (ret != LZMA_OK) { goto error; } // Check that the Stream Footer doesn't specify something // that we don't support. This can only happen if the xz // version is older than liblzma and liblzma supports // something new. // // It is enough to check Stream Footer. Stream Header must // match when it is compared against Stream Footer with // lzma_stream_flags_compare(). if (footer_flags.version != 0) { goto error; } // Check that the size of the Index field looks sane. lzma_vli index_size = footer_flags.backward_size; if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) { goto error; } // Set pos to the beginning of the Index. pos -= index_size; // Decode the Index. ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX); if (ret != LZMA_OK) { goto error; } do { // Don't give the decoder more input than the // Index size. strm.avail_in = my_min(IO_BUFFER_SIZE, index_size); if (io_pread(src_fd, &buf, strm.avail_in, pos)) goto error; pos += strm.avail_in; index_size -= strm.avail_in; strm.next_in = buf.u8; ret = lzma_code(&strm, LZMA_RUN); } while (ret == LZMA_OK); // If the decoding seems to be successful, check also that // the Index decoder consumed as much input as indicated // by the Backward Size field. if (ret == LZMA_STREAM_END) if (index_size != 0 || strm.avail_in != 0) ret = LZMA_DATA_ERROR; if (ret != LZMA_STREAM_END) { // LZMA_BUFFER_ERROR means that the Index decoder // would have liked more input than what the Index // size should be according to Stream Footer. // The message for LZMA_DATA_ERROR makes more // sense in that case. if (ret == LZMA_BUF_ERROR) ret = LZMA_DATA_ERROR; goto error; } // Decode the Stream Header and check that its Stream Flags // match the Stream Footer. pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE; if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) { goto error; } pos -= lzma_index_total_size(this_index); if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos)) goto error; ret = lzma_stream_header_decode(&header_flags, buf.u8); if (ret != LZMA_OK) { goto error; } ret = lzma_stream_flags_compare(&header_flags, &footer_flags); if (ret != LZMA_OK) { goto error; } // Store the decoded Stream Flags into this_index. This is // needed so that we can print which Check is used in each // Stream. ret = lzma_index_stream_flags(this_index, &footer_flags); if (ret != LZMA_OK) goto error; // Store also the size of the Stream Padding field. It is // needed to show the offsets of the Streams correctly. ret = lzma_index_stream_padding(this_index, stream_padding); if (ret != LZMA_OK) goto error; if (combined_index != NULL) { // Append the earlier decoded Indexes // after this_index. ret = lzma_index_cat( this_index, combined_index, NULL); if (ret != LZMA_OK) { goto error; } } combined_index = this_index; this_index = NULL; xfi->stream_padding += stream_padding; } while (pos > 0); lzma_end(&strm); // All OK. Make combined_index available to the caller. xfi->idx = combined_index; return false; error: // Something went wrong, free the allocated memory. lzma_end(&strm); lzma_index_end(combined_index, NULL); lzma_index_end(this_index, NULL); return true; }
/// \brief Parse the Index(es) from the given .xz file /// /// \param xfi Pointer to structure where the decoded information /// is stored. /// \param pair Input file /// /// \return On success, false is returned. On error, true is returned. /// // TODO: This function is pretty big. liblzma should have a function that // takes a callback function to parse the Index(es) from a .xz file to make // it easy for applications. static bool parse_indexes(xz_file_info *xfi, file_pair *pair) { if (pair->src_st.st_size <= 0) { message_error(_("%s: File is empty"), pair->src_name); return true; } if (pair->src_st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { message_error(_("%s: Too small to be a valid .xz file"), pair->src_name); return true; } io_buf buf; lzma_stream_flags header_flags; lzma_stream_flags footer_flags; lzma_ret ret; // lzma_stream for the Index decoder lzma_stream strm = LZMA_STREAM_INIT; // All Indexes decoded so far lzma_index *combined_index = NULL; // The Index currently being decoded lzma_index *this_index = NULL; // Current position in the file. We parse the file backwards so // initialize it to point to the end of the file. off_t pos = pair->src_st.st_size; // Each loop iteration decodes one Index. do { // Check that there is enough data left to contain at least // the Stream Header and Stream Footer. This check cannot // fail in the first pass of this loop. if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { message_error("%s: %s", pair->src_name, message_strm(LZMA_DATA_ERROR)); goto error; } pos -= LZMA_STREAM_HEADER_SIZE; lzma_vli stream_padding = 0; // Locate the Stream Footer. There may be Stream Padding which // we must skip when reading backwards. while (true) { if (pos < LZMA_STREAM_HEADER_SIZE) { message_error("%s: %s", pair->src_name, message_strm( LZMA_DATA_ERROR)); goto error; } if (io_pread(pair, &buf, LZMA_STREAM_HEADER_SIZE, pos)) goto error; // Stream Padding is always a multiple of four bytes. int i = 2; if (buf.u32[i] != 0) break; // To avoid calling io_pread() for every four bytes // of Stream Padding, take advantage that we read // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and // check them too before calling io_pread() again. do { stream_padding += 4; pos -= 4; --i; } while (i >= 0 && buf.u32[i] == 0); } // Decode the Stream Footer. ret = lzma_stream_footer_decode(&footer_flags, buf.u8); if (ret != LZMA_OK) { message_error("%s: %s", pair->src_name, message_strm(ret)); goto error; } // Check that the size of the Index field looks sane. lzma_vli index_size = footer_flags.backward_size; if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) { message_error("%s: %s", pair->src_name, message_strm(LZMA_DATA_ERROR)); goto error; } // Set pos to the beginning of the Index. pos -= index_size; // See how much memory we can use for decoding this Index. uint64_t memlimit = hardware_memlimit_get(MODE_LIST); uint64_t memused = 0; if (combined_index != NULL) { memused = lzma_index_memused(combined_index); if (memused > memlimit) message_bug(); memlimit -= memused; } // Decode the Index. ret = lzma_index_decoder(&strm, &this_index, memlimit); if (ret != LZMA_OK) { message_error("%s: %s", pair->src_name, message_strm(ret)); goto error; } do { // Don't give the decoder more input than the // Index size. strm.avail_in = my_min(IO_BUFFER_SIZE, index_size); if (io_pread(pair, &buf, strm.avail_in, pos)) goto error; pos += strm.avail_in; index_size -= strm.avail_in; strm.next_in = buf.u8; ret = lzma_code(&strm, LZMA_RUN); } while (ret == LZMA_OK); // If the decoding seems to be successful, check also that // the Index decoder consumed as much input as indicated // by the Backward Size field. if (ret == LZMA_STREAM_END) if (index_size != 0 || strm.avail_in != 0) ret = LZMA_DATA_ERROR; if (ret != LZMA_STREAM_END) { // LZMA_BUFFER_ERROR means that the Index decoder // would have liked more input than what the Index // size should be according to Stream Footer. // The message for LZMA_DATA_ERROR makes more // sense in that case. if (ret == LZMA_BUF_ERROR) ret = LZMA_DATA_ERROR; message_error("%s: %s", pair->src_name, message_strm(ret)); // If the error was too low memory usage limit, // show also how much memory would have been needed. if (ret == LZMA_MEMLIMIT_ERROR) { uint64_t needed = lzma_memusage(&strm); if (UINT64_MAX - needed < memused) needed = UINT64_MAX; else needed += memused; message_mem_needed(V_ERROR, needed); } goto error; } // Decode the Stream Header and check that its Stream Flags // match the Stream Footer. pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE; if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) { message_error("%s: %s", pair->src_name, message_strm(LZMA_DATA_ERROR)); goto error; } pos -= lzma_index_total_size(this_index); if (io_pread(pair, &buf, LZMA_STREAM_HEADER_SIZE, pos)) goto error; ret = lzma_stream_header_decode(&header_flags, buf.u8); if (ret != LZMA_OK) { message_error("%s: %s", pair->src_name, message_strm(ret)); goto error; } ret = lzma_stream_flags_compare(&header_flags, &footer_flags); if (ret != LZMA_OK) { message_error("%s: %s", pair->src_name, message_strm(ret)); goto error; } // Store the decoded Stream Flags into this_index. This is // needed so that we can print which Check is used in each // Stream. ret = lzma_index_stream_flags(this_index, &footer_flags); if (ret != LZMA_OK) message_bug(); // Store also the size of the Stream Padding field. It is // needed to show the offsets of the Streams correctly. ret = lzma_index_stream_padding(this_index, stream_padding); if (ret != LZMA_OK) message_bug(); if (combined_index != NULL) { // Append the earlier decoded Indexes // after this_index. ret = lzma_index_cat( this_index, combined_index, NULL); if (ret != LZMA_OK) { message_error("%s: %s", pair->src_name, message_strm(ret)); goto error; } } combined_index = this_index; this_index = NULL; xfi->stream_padding += stream_padding; } while (pos > 0); lzma_end(&strm); // All OK. Make combined_index available to the caller. xfi->idx = combined_index; return false; error: // Something went wrong, free the allocated memory. lzma_end(&strm); lzma_index_end(combined_index, NULL); lzma_index_end(this_index, NULL); return true; }