/* * Allow each registered format to bid on whether it wants to handle * the next entry. Return index of winning bidder. */ static int choose_format(struct archive_read *a) { int slots; int i; int bid, best_bid; int best_bid_slot; slots = sizeof(a->formats) / sizeof(a->formats[0]); best_bid = -1; best_bid_slot = -1; /* Set up a->format for convenience of bidders. */ a->format = &(a->formats[0]); for (i = 0; i < slots; i++, a->format++) { if (a->format->bid) { bid = (a->format->bid)(a, best_bid); if (bid == ARCHIVE_FATAL) return (ARCHIVE_FATAL); if (a->filter->position != 0) __archive_read_seek(a, 0, SEEK_SET); if ((bid > best_bid) || (best_bid_slot < 0)) { best_bid = bid; best_bid_slot = i; } } } /* * There were no bidders; this is a serious programmer error * and demands a quick and definitive abort. */ if (best_bid_slot < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "No formats registered"); return (ARCHIVE_FATAL); } /* * There were bidders, but no non-zero bids; this means we * can't support this stream. */ if (best_bid < 1) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unrecognized archive format"); return (ARCHIVE_FATAL); } return (best_bid_slot); }
static int slurp_central_directory(struct archive_read *a, struct zip *zip) { unsigned i; int64_t correction; static const struct archive_rb_tree_ops rb_ops = { &cmp_node, &cmp_key }; static const struct archive_rb_tree_ops rb_rsrc_ops = { &rsrc_cmp_node, &rsrc_cmp_key }; /* * Consider the archive file we are reading may be SFX. * So we have to calculate a SFX header size to revise * ZIP header offsets. */ correction = zip->end_of_central_directory_offset - (zip->central_directory_offset + zip->central_directory_size); /* The central directory offset is relative value, and so * we revise this offset for SFX. */ zip->central_directory_offset += correction; __archive_read_seek(a, zip->central_directory_offset, SEEK_SET); zip->offset = zip->central_directory_offset; __archive_rb_tree_init(&zip->tree, &rb_ops); __archive_rb_tree_init(&zip->tree_rsrc, &rb_rsrc_ops); zip->zip_entries = calloc(zip->central_directory_entries, sizeof(struct zip_entry)); for (i = 0; i < zip->central_directory_entries; ++i) { struct zip_entry *zip_entry = &zip->zip_entries[i]; size_t filename_length, extra_length, comment_length; uint32_t external_attributes; const char *name, *p, *r; if ((p = __archive_read_ahead(a, 46, NULL)) == NULL) return ARCHIVE_FATAL; if (memcmp(p, "PK\001\002", 4) != 0) { archive_set_error(&a->archive, -1, "Invalid central directory signature"); return ARCHIVE_FATAL; } zip->have_central_directory = 1; /* version = p[4]; */ zip_entry->system = p[5]; /* version_required = archive_le16dec(p + 6); */ zip_entry->flags = archive_le16dec(p + 8); zip_entry->compression = (char)archive_le16dec(p + 10); zip_entry->mtime = zip_time(p + 12); zip_entry->crc32 = archive_le32dec(p + 16); zip_entry->compressed_size = archive_le32dec(p + 20); zip_entry->uncompressed_size = archive_le32dec(p + 24); filename_length = archive_le16dec(p + 28); extra_length = archive_le16dec(p + 30); comment_length = archive_le16dec(p + 32); /* disk_start = archive_le16dec(p + 34); */ /* Better be zero. */ /* internal_attributes = archive_le16dec(p + 36); */ /* text bit */ external_attributes = archive_le32dec(p + 38); zip_entry->local_header_offset = archive_le32dec(p + 42) + correction; /* If we can't guess the mode, leave it zero here; when we read the local file header we might get more information. */ zip_entry->mode = 0; if (zip_entry->system == 3) { zip_entry->mode = external_attributes >> 16; } /* * Mac resource fork files are stored under the * "__MACOSX/" directory, so we should check if * it is. */ /* Make sure we have the file name. */ if ((p = __archive_read_ahead(a, 46 + filename_length, NULL)) == NULL) return ARCHIVE_FATAL; name = p + 46; r = rsrc_basename(name, filename_length); if (filename_length >= 9 && strncmp("__MACOSX/", name, 9) == 0) { /* If this file is not a resource fork nor * a directory. We should treat it as a non * resource fork file to expose it. */ if (name[filename_length-1] != '/' && (r - name < 3 || r[0] != '.' || r[1] != '_')) { __archive_rb_tree_insert_node(&zip->tree, &zip_entry->node); /* Expose its parent directories. */ expose_parent_dirs(zip, name, filename_length); } else { /* This file is a resource fork file or * a directory. */ archive_strncpy(&(zip_entry->rsrcname), name, filename_length); __archive_rb_tree_insert_node(&zip->tree_rsrc, &zip_entry->node); } } else { /* Generate resource fork name to find its resource * file at zip->tree_rsrc. */ archive_strcpy(&(zip_entry->rsrcname), "__MACOSX/"); archive_strncat(&(zip_entry->rsrcname), name, r - name); archive_strcat(&(zip_entry->rsrcname), "._"); archive_strncat(&(zip_entry->rsrcname), name + (r - name), filename_length - (r - name)); /* Register an entry to RB tree to sort it by * file offset. */ __archive_rb_tree_insert_node(&zip->tree, &zip_entry->node); } /* We don't read the filename until we get to the local file header. Reading it here would speed up table-of-contents operations (removing the need to find and read local file header to get the filename) at the cost of requiring a lot of extra space. */ /* We don't read the extra block here. We assume it will be duplicated at the local file header. */ __archive_read_consume(a, 46 + filename_length + extra_length + comment_length); }
/* * TODO: This is a performance sink because it forces the read core to * drop buffered data from the start of file, which will then have to * be re-read again if this bidder loses. * * We workaround this a little by passing in the best bid so far so * that later bidders can do nothing if they know they'll never * outbid. But we can certainly do better... */ static int archive_read_format_zip_seekable_bid(struct archive_read *a, int best_bid) { struct zip *zip = (struct zip *)a->format->data; int64_t filesize; const char *p; /* If someone has already bid more than 32, then avoid trashing the look-ahead buffers with a seek. */ if (best_bid > 32) return (-1); filesize = __archive_read_seek(a, -22, SEEK_END); /* If we can't seek, then we can't bid. */ if (filesize <= 0) return 0; /* TODO: More robust search for end of central directory record. */ if ((p = __archive_read_ahead(a, 22, NULL)) == NULL) return 0; /* First four bytes are signature for end of central directory record. Four zero bytes ensure this isn't a multi-volume Zip file (which we don't yet support). */ if (memcmp(p, "PK\005\006\000\000\000\000", 8) != 0) { int64_t i, tail; int found; /* * If there is a comment in end of central directory * record, 22 bytes are too short. we have to read more * to properly detect the record. Hopefully, a length * of the comment is not longer than 16362 bytes(16K-22). */ if (filesize + 22 > 1024 * 16) { tail = 1024 * 16; filesize = __archive_read_seek(a, tail * -1, SEEK_END); } else { tail = filesize + 22; filesize = __archive_read_seek(a, 0, SEEK_SET); } if (filesize < 0) return 0; if ((p = __archive_read_ahead(a, (size_t)tail, NULL)) == NULL) return 0; for (found = 0, i = 0;!found && i < tail - 22;) { switch (p[i]) { case 'P': if (memcmp(p+i, "PK\005\006\000\000\000\000", 8) == 0) { p += i; filesize += tail - (22 + archive_le16dec(p+20)); found = 1; } else i += 8; break; case 'K': i += 7; break; case 005: i += 6; break; case 006: i += 5; break; default: i += 1; break; } } if (!found) return 0; } /* Since we've already done the hard work of finding the end of central directory record, let's save the important information. */ zip->central_directory_entries = archive_le16dec(p + 10); zip->central_directory_size = archive_le32dec(p + 12); zip->central_directory_offset = archive_le32dec(p + 16); zip->end_of_central_directory_offset = filesize; /* Just one volume, so central dir must all be on this volume. */ if (zip->central_directory_entries != archive_le16dec(p + 8)) return 0; /* Central directory can't extend beyond end of this file. */ if (zip->central_directory_offset + (int64_t)zip->central_directory_size > filesize) return 0; /* This is just a tiny bit higher than the maximum returned by the streaming Zip bidder. This ensures that the more accurate seeking Zip parser wins whenever seek is available. */ return 32; }