off64_t zip_find_end_of_central_directory(ar_stream *stream) { uint8_t data[512]; off64_t filesize; int fromend = 0; int count, i; if (!ar_seek(stream, 0, SEEK_END)) return -1; filesize = ar_tell(stream); while (fromend < UINT16_MAX + ZIP_END_OF_CENTRAL_DIR_SIZE && fromend < filesize) { count = (int)(filesize - fromend < sizeof(data) ? filesize - fromend : sizeof(data)); fromend += count; if (count < ZIP_END_OF_CENTRAL_DIR_SIZE) return -1; if (!ar_seek(stream, -fromend, SEEK_END)) return -1; if (ar_read(stream, data, count) != (size_t)count) return -1; for (i = count - ZIP_END_OF_CENTRAL_DIR_SIZE; i >= 0; i--) { if (uint32le(data + i) == SIG_END_OF_CENTRAL_DIRECTORY) return filesize - fromend + i; } fromend -= ZIP_END_OF_CENTRAL_DIR_SIZE - 1; } return -1; }
const char *zip_get_name(ar_archive *ar) { ar_archive_zip *zip = (ar_archive_zip *)ar; if (!zip->entry.name) { struct zip_entry entry; char *name; if (zip->dir.end_offset >= 0) { if (!ar_seek(ar->stream, ar->entry_offset, SEEK_SET)) return NULL; if (!zip_parse_directory_entry(zip, &entry)) return NULL; if (!ar_seek(ar->stream, ar->entry_offset + ZIP_DIR_ENTRY_FIXED_SIZE, SEEK_SET)) return NULL; } else { if (!ar_seek(ar->stream, zip->entry.offset, SEEK_SET)) return NULL; if (!zip_parse_local_file_entry(zip, &entry)) return NULL; if (!ar_seek(ar->stream, ar->entry_offset + ZIP_LOCAL_ENTRY_FIXED_SIZE, SEEK_SET)) return NULL; } name = malloc(entry.namelen + 1); if (!name || ar_read(ar->stream, name, entry.namelen) != entry.namelen) { free(name); return NULL; } name[entry.namelen] = '\0'; if ((entry.flags & (1 << 11))) { zip->entry.name = name; } else { zip->entry.name = ar_conv_dos_to_utf8(name); free(name); } /* normalize path separators */ if (zip->entry.name) { char *p = zip->entry.name; while ((p = strchr(p, '\\')) != NULL) { *p = '/'; } } } return zip->entry.name; }
ar_archive *ar_open_7z_archive(ar_stream *stream) { ar_archive *ar; ar_archive_7z *_7z; SRes res; if (!ar_seek(stream, 0, SEEK_SET)) return NULL; ar = ar_open_archive(stream, sizeof(ar_archive_7z), _7z_close, _7z_parse_entry, _7z_get_name, _7z_uncompress, NULL, 0); if (!ar) return NULL; _7z = (ar_archive_7z *)ar; CSeekStream_CreateVTable(&_7z->in_stream, stream); LookToRead_CreateVTable(&_7z->look_stream, False); _7z->look_stream.realStream = &_7z->in_stream.super; LookToRead_Init(&_7z->look_stream); #ifdef USE_7Z_CRC32 CrcGenerateTable(); #endif SzArEx_Init(&_7z->data); res = SzArEx_Open(&_7z->data, &_7z->look_stream.s, &gSzAlloc, &gSzAlloc); if (res != SZ_OK) { if (res != SZ_ERROR_NO_ARCHIVE) warn("Invalid 7z archive (failed with error %d)", res); free(ar); return NULL; } return ar; }
bool ar_parse_entry_at(ar_archive *ar, size_t offset) { if (!ar_seek(ar->stream, offset, SEEK_SET)) return false; ar->entry_offset = 0; ar->entry_size_block = offset; return ar->parse_entry(ar); }
static SRes CSeekStream_Seek(void *p, Int64 *pos, ESzSeek origin) { struct CSeekStream *stm = p; if (!ar_seek(stm->stream, *pos, (int)origin)) return SZ_ERROR_FAIL; *pos = ar_tell(stm->stream); return SZ_OK; }
bool zip_seek_to_compressed_data(ar_archive_zip *zip) { struct zip_entry entry; if (!ar_seek(zip->super.stream, zip->entry.offset, SEEK_SET)) return false; if (!zip_parse_local_file_entry(zip, &entry)) return false; if (zip->entry.method != entry.method) { warn("Compression methods don't match: %d != %d", zip->entry.method, entry.method); if (!zip->entry.method) zip->entry.method = entry.method; } if (zip->entry.dosdate != entry.dosdate) { warn("Timestamps don't match"); if (!zip->entry.dosdate) { zip->entry.dosdate = entry.dosdate; zip->super.entry_filetime = ar_conv_dosdate_to_filetime(zip->entry.dosdate); } } return ar_seek(zip->super.stream, zip->entry.offset + ZIP_LOCAL_ENTRY_FIXED_SIZE + entry.namelen + entry.extralen, SEEK_SET); }
off64_t zip_find_end_of_last_directory_entry(ar_stream *stream, struct zip_eocd64 *eocd) { uint8_t data[ZIP_DIR_ENTRY_FIXED_SIZE]; uint64_t i; if (!ar_seek(stream, eocd->dir_offset, SEEK_SET)) return -1; for (i = 0; i < eocd->numentries; i++) { if (ar_read(stream, data, sizeof(data)) != sizeof(data)) return -1; if (uint32le(data + 0) != SIG_CENTRAL_DIRECTORY) return -1; if (!ar_skip(stream, uint16le(data + 28) + uint16le(data + 30) + uint16le(data + 32))) return -1; } return ar_tell(stream); }
ar_archive *ar_open_rar_archive(ar_stream *stream) { char signature[FILE_SIGNATURE_SIZE]; if (!ar_seek(stream, 0, SEEK_SET)) return NULL; if (ar_read(stream, signature, sizeof(signature)) != sizeof(signature)) return NULL; if (memcmp(signature, "Rar!\x1A\x07\x00", sizeof(signature)) != 0) { if (memcmp(signature, "Rar!\x1A\x07\x01", sizeof(signature)) == 0) warn("RAR 5 format isn't supported"); else if (memcmp(signature, "RE~^", 4) == 0) warn("Ancient RAR format isn't supported"); else if (memcmp(signature, "MZ", 2) == 0 || memcmp(signature, "\x7F\x45LF", 4) == 0) warn("SFX archives aren't supported"); return NULL; } return ar_open_archive(stream, sizeof(ar_archive_rar), rar_close, rar_parse_entry, rar_get_name, rar_uncompress, NULL, FILE_SIGNATURE_SIZE); }
ar_archive *ar_open_tar_archive(ar_stream *stream) { ar_archive *ar; ar_archive_tar *tar; if (!ar_seek(stream, 0, SEEK_SET)) return NULL; ar = ar_open_archive(stream, sizeof(ar_archive_tar), tar_close, tar_parse_entry, tar_get_name, tar_uncompress, NULL, 0); if (!ar) return NULL; tar = (ar_archive_tar *)ar; if (!tar_parse_header(tar) || !tar->entry.checksum) { free(ar); return NULL; } return ar; }
off64_t zip_find_next_local_file_entry(ar_stream *stream, off64_t offset) { uint8_t data[512]; int count, i; if (!ar_seek(stream, offset, SEEK_SET)) return false; count = (int)ar_read(stream, data, sizeof(data)); while (count >= ZIP_LOCAL_ENTRY_FIXED_SIZE) { for (i = 0; i < count - 4; i++) { if (uint32le(data + i) == SIG_LOCAL_FILE_HEADER) return offset + i; } memmove(data, data + count - 4, count); offset += count - 4; count = (int)ar_read(stream, data + 4, sizeof(data) - 4) + 4; } return -1; }
bool zip_parse_end_of_central_directory(ar_stream *stream, struct zip_eocd64 *eocd) { uint8_t data[56]; if (ar_read(stream, data, ZIP_END_OF_CENTRAL_DIR_SIZE) != ZIP_END_OF_CENTRAL_DIR_SIZE) return false; eocd->signature = uint32le(data + 0); eocd->diskno = uint16le(data + 4); eocd->diskno_dir = uint16le(data + 6); eocd->numentries_disk = uint16le(data + 8); eocd->numentries = uint16le(data + 10); eocd->dir_size = uint32le(data + 12); eocd->dir_offset = uint32le(data + 16); eocd->commentlen = uint16le(data + 20); if (eocd->signature != SIG_END_OF_CENTRAL_DIRECTORY) return false; /* try to locate the ZIP64 end of central directory */ if (!ar_skip(stream, -42)) return eocd->dir_size < 20; if (ar_read(stream, data, 20) != 20) return false; if (uint32le(data + 0) != SIG_END_OF_CENTRAL_DIRECTORY_64_LOCATOR) return true; if ((eocd->diskno != UINT16_MAX && uint32le(data + 4) != eocd->diskno) || uint32le(data + 16) != 1) { warn("Archive spanning isn't supported"); return false; } if (!ar_seek(stream, (off64_t)uint64le(data + 8), SEEK_SET)) return false; if (ar_read(stream, data, 56) != 56) return false; /* use data from ZIP64 end of central directory (when necessary) */ eocd->signature = uint32le(data + 0); eocd->version = uint16le(data + 12); eocd->min_version = uint16le(data + 14); if (eocd->diskno == UINT16_MAX) eocd->diskno = uint32le(data + 16); if (eocd->diskno_dir == UINT16_MAX) eocd->diskno_dir = uint32le(data + 20); if (eocd->numentries_disk == UINT16_MAX) eocd->numentries_disk = uint64le(data + 24); if (eocd->numentries == UINT16_MAX) eocd->numentries = uint64le(data + 32); if (eocd->dir_size == UINT32_MAX) eocd->dir_size = uint64le(data + 40); if (eocd->dir_offset == UINT32_MAX) eocd->dir_offset = (off64_t)uint64le(data + 48); if (eocd->signature != SIG_END_OF_CENTRAL_DIRECTORY_64) return false; if (eocd->diskno != eocd->diskno_dir || eocd->numentries != eocd->numentries_disk) { warn("Archive spanning isn't supported"); return false; } if (uint64le(data + 4) > 44) log("ZIP64 extensible data sector present @" PRIi64, ar_tell(stream)); return true; }
static bool tar_parse_entry(ar_archive *ar, off64_t offset) { ar_archive_tar *tar = (ar_archive_tar *)ar; char *longname; if (!ar_seek(ar->stream, offset, SEEK_SET)) { warn("Couldn't seek to offset %" PRIi64, offset); return false; } if (!tar_parse_header(tar)) { warn("Invalid tar header data @%" PRIi64, offset); return false; } if (!tar->entry.checksum) { ar->at_eof = true; return false; } ar->entry_offset = offset; ar->entry_offset_next = offset + TAR_BLOCK_SIZE + (tar->entry.filesize + TAR_BLOCK_SIZE - 1) / TAR_BLOCK_SIZE * TAR_BLOCK_SIZE; ar->entry_size_uncompressed = tar->entry.filesize; ar->entry_filetime = tar->entry.mtime; tar->bytes_done = 0; switch (tar->entry.filetype) { case TYPE_FILE: case TYPE_FILE_OLD: return true; case TYPE_DIRECTORY: log("Skipping directory entry \"%s\"", tar_get_name(ar)); return tar_parse_entry(ar, ar->entry_offset_next); case TYPE_LONGNAME: longname = malloc(tar->entry.filesize + 1); if (!longname) { log("Falling back to the short filename on OOM"); return tar_parse_entry(ar, ar->entry_offset_next); } if (!ar_entry_uncompress(ar, longname, tar->entry.filesize)) { free(longname); return false; } longname[tar->entry.filesize] = '\0'; if (!tar_parse_entry(ar, ar->entry_offset_next)) { free(longname); return false; } ar->entry_offset = offset; /* name could be in any encoding, assume UTF-8 or whatever (DOS) */ if (ar_is_valid_utf8(longname)) { tar->entry.name = longname; } else { tar->entry.name = ar_conv_dos_to_utf8(longname); free(longname); } return true; default: warn("Unknown entry type '%c'", tar->entry.filetype); return true; } }
static bool rar_parse_entry(ar_archive *ar) { ar_archive_rar *rar = (ar_archive_rar *)ar; struct rar_header header; struct rar_entry entry; /* without solid data, most/all previous files have to be decompressed again */ bool has_solid_data = rar->super.entry_offset != 0 && rar->uncomp.initialized && rar->progr.data_left == 0; if (rar->super.entry_offset != 0) { if (!ar_seek(rar->super.stream, rar->super.entry_offset + rar->super.entry_size_block, SEEK_SET)) { warn("Couldn't seek to offset %" PRIuPTR, rar->super.entry_offset + rar->super.entry_size_block); return false; } } for (;;) { rar->super.entry_offset = ar_tell(rar->super.stream); rar->super.entry_size_block = 0; rar->super.entry_size_uncompressed = 0; if (!rar_parse_header(&rar->super, &header)) return false; switch (header.type) { case TYPE_MAIN_HEADER: if ((header.flags & MHD_PASSWORD)) { warn("Encrypted archives aren't supported"); return false; } ar_skip(rar->super.stream, 6 /* reserved data */); if ((header.flags & MHD_ENCRYPTVER)) { log("MHD_ENCRYPTVER is set"); ar_skip(rar->super.stream, 1); } if ((header.flags & MHD_COMMENT)) log("MHD_COMMENT is set"); if (ar_tell(rar->super.stream) - rar->super.entry_offset > header.size) { warn("Invalid RAR header size: %" PRIuPTR, header.size); return false; } rar->archive_flags = header.flags; break; case TYPE_FILE_ENTRY: if (!rar_parse_header_entry(rar, &header, &entry)) return false; if ((header.flags & LHD_PASSWORD)) warn("Encrypted entries will fail to uncompress"); if ((header.flags & LHD_DIRECTORY) == LHD_DIRECTORY) { log("Skipping directory entry \"%s\"", rar_get_name(&rar->super)); break; } if ((header.flags & (LHD_SPLIT_BEFORE | LHD_SPLIT_AFTER))) warn("Splitting files isn't really supported"); // TODO: handle multi-part files (only needed for split files)? rar->super.entry_size_block = header.size + (size_t)header.datasize; rar->super.entry_size_uncompressed = (size_t)entry.size; if (rar->super.entry_size_block < rar->entry.header_size) { warn("Integer overflow due to overly large data size"); return false; } if (!has_solid_data || !rar->entry.restart_solid || rar->entry.method == METHOD_STORE) rar_clear_uncompress(&rar->uncomp); else rar->entry.restart_solid = false; #ifdef DEBUG // TODO: CRC checks don't always hold (claim in XADRARParser.m @readBlockHeader) if (!rar_check_header_crc(&rar->super)) warn("Invalid header checksum @%" PRIuPTR, rar->super.entry_offset); #endif if (!ar_seek(rar->super.stream, rar->super.entry_offset + rar->entry.header_size, SEEK_SET)) { warn("Couldn't seek to offset %" PRIuPTR, rar->super.entry_offset + rar->entry.header_size); return false; } return true; case TYPE_NEWSUB: log("Skipping newsub header @%" PRIuPTR, rar->super.entry_offset); break; case TYPE_END_OF_ARCHIVE: rar->super.at_eof = true; return false; default: log("Unknown RAR header type %02x", header.type); break; } #ifdef DEBUG // TODO: CRC checks don't always hold (claim in XADRARParser.m @readBlockHeader) if (!rar_check_header_crc(&rar->super)) warn("Invalid header checksum @%" PRIuPTR, rar->super.entry_offset); #endif if (!ar_seek(rar->super.stream, rar->super.entry_offset + header.size + (ptrdiff_t)header.datasize, SEEK_SET)) { warn("Couldn't seek to offset %" PRIu64, rar->super.entry_offset + header.size + header.datasize); return false; } if (ar_tell(rar->super.stream) <= rar->super.entry_offset) { warn("Integer overflow due to overly large data size"); return false; } } }
static bool rar_parse_entry(ar_archive *ar, off64_t offset) { ar_archive_rar *rar = (ar_archive_rar *)ar; struct rar_header header; struct rar_entry entry; bool out_of_order = offset != ar->entry_offset_next; if (!ar_seek(ar->stream, offset, SEEK_SET)) { warn("Couldn't seek to offset %" PRIi64, offset); return false; } for (;;) { ar->entry_offset = ar_tell(ar->stream); ar->entry_size_uncompressed = 0; if (!rar_parse_header(ar, &header)) return false; ar->entry_offset_next = ar->entry_offset + header.size + header.datasize; if (ar->entry_offset_next < ar->entry_offset + header.size) { warn("Integer overflow due to overly large data size"); return false; } switch (header.type) { case TYPE_MAIN_HEADER: if ((header.flags & MHD_PASSWORD)) { warn("Encrypted archives aren't supported"); return false; } ar_skip(ar->stream, 6 /* reserved data */); if ((header.flags & MHD_ENCRYPTVER)) { log("MHD_ENCRYPTVER is set"); ar_skip(ar->stream, 1); } if ((header.flags & MHD_COMMENT)) log("MHD_COMMENT is set"); if (ar_tell(ar->stream) - ar->entry_offset > header.size) { warn("Invalid RAR header size: %d", header.size); return false; } rar->archive_flags = header.flags; break; case TYPE_FILE_ENTRY: if (!rar_parse_header_entry(rar, &header, &entry)) return false; if ((header.flags & LHD_PASSWORD)) warn("Encrypted entries will fail to uncompress"); if ((header.flags & LHD_DIRECTORY) == LHD_DIRECTORY) { if (header.datasize == 0) { log("Skipping directory entry \"%s\"", rar_get_name(ar)); break; } warn("Can't skip directory entries containing data"); } if ((header.flags & (LHD_SPLIT_BEFORE | LHD_SPLIT_AFTER))) warn("Splitting files isn't really supported"); ar->entry_size_uncompressed = (size_t)entry.size; ar->entry_filetime = ar_conv_dosdate_to_filetime(entry.dosdate); if (!rar->entry.solid || rar->entry.method == METHOD_STORE || out_of_order) { rar_clear_uncompress(&rar->uncomp); memset(&rar->solid, 0, sizeof(rar->solid)); } else { br_clear_leftover_bits(&rar->uncomp); } rar->solid.restart = rar->entry.solid && (out_of_order || !rar->solid.part_done); rar->solid.part_done = !ar->entry_size_uncompressed; rar->progress.data_left = (size_t)header.datasize; rar->progress.bytes_done = 0; rar->progress.crc = 0; /* TODO: CRC checks don't always hold (claim in XADRARParser.m @readBlockHeader) */ if (!rar_check_header_crc(ar)) warn("Invalid header checksum @%" PRIi64, ar->entry_offset); if (ar_tell(ar->stream) != ar->entry_offset + rar->entry.header_size) { warn("Couldn't seek to offset %" PRIi64, ar->entry_offset + rar->entry.header_size); return false; } return true; case TYPE_NEWSUB: log("Skipping newsub header @%" PRIi64, ar->entry_offset); break; case TYPE_END_OF_ARCHIVE: ar->at_eof = true; return false; default: log("Unknown RAR header type %02x", header.type); break; } /* TODO: CRC checks don't always hold (claim in XADRARParser.m @readBlockHeader) */ if (!rar_check_header_crc(ar)) warn("Invalid header checksum @%" PRIi64, ar->entry_offset); if (!ar_seek(ar->stream, ar->entry_offset_next, SEEK_SET)) { warn("Couldn't seek to offset %" PRIi64, ar->entry_offset_next); return false; } } }