/* * Reads zip file central directory. Returns the file position of first * CEN header, otherwise returns -1 if an error occured. If zip->msg != NULL * then the error was a zip format error and zip->msg has the error text. * Always pass in -1 for knownTotal; it's used for a recursive call. */ static jlong readCEN(jzfile *zip, jint knownTotal) { /* Following are unsigned 32-bit */ jlong endpos, end64pos, cenpos, cenlen, cenoff; /* Following are unsigned 16-bit */ jint total, tablelen, i, j; unsigned char *cenbuf = NULL; unsigned char *cenend; unsigned char *cp; #ifdef USE_MMAP static jlong pagesize; jlong offset; #endif unsigned char endbuf[ENDHDR]; jint endhdrlen = ENDHDR; jzcell *entries; jint *table; /* Clear previous zip error */ zip->msg = NULL; /* Get position of END header */ if ((endpos = findEND(zip, endbuf)) == -1) return -1; /* no END header or system error */ if (endpos == 0) return 0; /* only END header present */ freeCEN(zip); /* Get position and length of central directory */ cenlen = ENDSIZ(endbuf); cenoff = ENDOFF(endbuf); total = ENDTOT(endbuf); if (cenlen == ZIP64_MAGICVAL || cenoff == ZIP64_MAGICVAL || total == ZIP64_MAGICCOUNT) { unsigned char end64buf[ZIP64_ENDHDR]; if ((end64pos = findEND64(zip, end64buf, endpos)) != -1) { cenlen = ZIP64_ENDSIZ(end64buf); cenoff = ZIP64_ENDOFF(end64buf); total = (jint)ZIP64_ENDTOT(end64buf); endpos = end64pos; endhdrlen = ZIP64_ENDHDR; } } if (cenlen > endpos) ZIP_FORMAT_ERROR("invalid END header (bad central directory size)"); cenpos = endpos - cenlen; /* Get position of first local file (LOC) header, taking into * account that there may be a stub prefixed to the zip file. */ zip->locpos = cenpos - cenoff; if (zip->locpos < 0) ZIP_FORMAT_ERROR("invalid END header (bad central directory offset)"); #ifdef USE_MMAP if (zip->usemmap) { /* On Solaris & Linux prior to JDK 6, we used to mmap the whole jar file to * read the jar file contents. However, this greatly increased the perceived * footprint numbers because the mmap'ed pages were adding into the totals shown * by 'ps' and 'top'. We switched to mmaping only the central directory of jar * file while calling 'read' to read the rest of jar file. Here are a list of * reasons apart from above of why we are doing so: * 1. Greatly reduces mmap overhead after startup complete; * 2. Avoids dual path code maintainance; * 3. Greatly reduces risk of address space (not virtual memory) exhaustion. */ if (pagesize == 0) { pagesize = (jlong)sysconf(_SC_PAGESIZE); if (pagesize == 0) goto Catch; } if (cenpos > pagesize) { offset = cenpos & ~(pagesize - 1); } else { offset = 0; } /* When we are not calling recursively, knownTotal is -1. */ if (knownTotal == -1) { void* mappedAddr; /* Mmap the CEN and END part only. We have to figure out the page size in order to make offset to be multiples of page size. */ zip->mlen = cenpos - offset + cenlen + endhdrlen; zip->offset = offset; mappedAddr = mmap64(0, zip->mlen, PROT_READ, MAP_SHARED, zip->zfd, (off64_t) offset); zip->maddr = (mappedAddr == (void*) MAP_FAILED) ? NULL : (unsigned char*)mappedAddr; if (zip->maddr == NULL) { jio_fprintf(stderr, "mmap failed for CEN and END part of zip file\n"); goto Catch; } } cenbuf = zip->maddr + cenpos - offset; } else #endif { if ((cenbuf = malloc((size_t) cenlen)) == NULL || (readFullyAt(zip->zfd, cenbuf, cenlen, cenpos) == -1)) goto Catch; } cenend = cenbuf + cenlen; /* Initialize zip file data structures based on the total number * of central directory entries as stored in ENDTOT. Since this * is a 2-byte field, but we (and other zip implementations) * support approx. 2**31 entries, we do not trust ENDTOT, but * treat it only as a strong hint. When we call ourselves * recursively, knownTotal will have the "true" value. * * Keep this path alive even with the Zip64 END support added, just * for zip files that have more than 0xffff entries but don't have * the Zip64 enabled. */ total = (knownTotal != -1) ? knownTotal : total; entries = zip->entries = calloc(total, sizeof(entries[0])); tablelen = zip->tablelen = ((total/2) | 1); // Odd -> fewer collisions table = zip->table = malloc(tablelen * sizeof(table[0])); if (entries == NULL || table == NULL) goto Catch; for (j = 0; j < tablelen; j++) table[j] = ZIP_ENDCHAIN; /* Iterate through the entries in the central directory */ for (i = 0, cp = cenbuf; cp <= cenend - CENHDR; i++, cp += CENSIZE(cp)) { /* Following are unsigned 16-bit */ jint method, nlen; unsigned int hsh; if (i >= total) { /* This will only happen if the zip file has an incorrect * ENDTOT field, which usually means it contains more than * 65535 entries. */ cenpos = readCEN(zip, countCENHeaders(cenbuf, cenend)); goto Finally; } method = CENHOW(cp); nlen = CENNAM(cp); if (GETSIG(cp) != CENSIG) ZIP_FORMAT_ERROR("invalid CEN header (bad signature)"); if (CENFLG(cp) & 1) ZIP_FORMAT_ERROR("invalid CEN header (encrypted entry)"); if (method != STORED && method != DEFLATED) ZIP_FORMAT_ERROR("invalid CEN header (bad compression method)"); if (cp + CENHDR + nlen > cenend) ZIP_FORMAT_ERROR("invalid CEN header (bad header size)"); /* if the entry is metadata add it to our metadata names */ if (isMetaName((char *)cp+CENHDR, nlen)) if (addMetaName(zip, (char *)cp+CENHDR, nlen) != 0) goto Catch; /* Record the CEN offset and the name hash in our hash cell. */ entries[i].cenpos = cenpos + (cp - cenbuf); entries[i].hash = hashN((char *)cp+CENHDR, nlen); /* Add the entry to the hash table */ hsh = entries[i].hash % tablelen; entries[i].next = table[hsh]; table[hsh] = i; } if (cp != cenend) ZIP_FORMAT_ERROR("invalid CEN header (bad header size)"); zip->total = i; goto Finally; Catch: freeCEN(zip); cenpos = -1; Finally: #ifdef USE_MMAP if (!zip->usemmap) #endif free(cenbuf); return cenpos; }
/* * Computes and positions at the start of the CEN header, ie. the central * directory, this will also return the offset if there is a zip file comment * at the end of the archive, for most cases this would be 0. */ static jlong compute_cen(int fd, Byte *bp) { int bytes; Byte *p; jlong base_offset; jlong offset; char buffer[MINREAD]; p = buffer; /* * Read the END Header, which is the starting point for ZIP files. * (Clearly designed to make writing a zip file easier than reading * one. Now isn't that precious...) */ if ((base_offset = find_end(fd, bp)) == -1) { return (-1); } p = bp; /* * There is a historical, but undocumented, ability to allow for * additional "stuff" to be prepended to the zip/jar file. It seems * that this has been used to prepend an actual java launcher * executable to the jar on Windows. Although this is just another * form of statically linking a small piece of the JVM to the * application, we choose to continue to support it. Note that no * guarantees have been made (or should be made) to the customer that * this will continue to work. * * Therefore, calculate the base offset of the zip file (within the * expanded file) by assuming that the central directory is followed * immediately by the end record. */ if (zip64_present) { if ((offset = ZIP64_LOCOFF(p)) < (jlong)0) { return -1; } if (JLI_Lseek(fd, offset, SEEK_SET) < (jlong) 0) { return (-1); } if ((bytes = read(fd, buffer, MINREAD)) < 0) { return (-1); } if (GETSIG(buffer) != ZIP64_ENDSIG) { return -1; } if ((offset = ZIP64_ENDOFF(buffer)) < (jlong)0) { return -1; } if (JLI_Lseek(fd, offset, SEEK_SET) < (jlong)0) { return (-1); } p = buffer; base_offset = base_offset - ZIP64_ENDSIZ(p) - ZIP64_ENDOFF(p) - ZIP64_ENDHDR; } else { base_offset = base_offset - ENDSIZ(p) - ENDOFF(p); /* * The END Header indicates the start of the Central Directory * Headers. Remember that the desired Central Directory Header (CEN) * will almost always be the second one and the first one is a small * directory entry ("META-INF/"). Keep the code optimized for * that case. * * Seek to the beginning of the Central Directory. */ if (JLI_Lseek(fd, base_offset + ENDOFF(p), SEEK_SET) < (jlong) 0) { return (-1); } } return base_offset; }