/* * Checks to see if we have ZIP64 archive, and save * the check for later use */ static int haveZIP64(Byte *p) { jlong cenlen, cenoff, centot; cenlen = ENDSIZ(p); cenoff = ENDOFF(p); centot = ENDTOT(p); zip64_present = (cenlen == ZIP64_MAGICVAL || cenoff == ZIP64_MAGICVAL || centot == ZIP64_MAGICCOUNT); return zip64_present; }
/* * Reads zip file central directory. Returns the file position of first * CEN header, otherwise returns -1 if an error occured. If zip->msg != NULL * then the error was a zip format error and zip->msg has the error text. * Always pass in -1 for knownTotal; it's used for a recursive call. */ static jlong readCEN(jzfile *zip, jint knownTotal) { /* Following are unsigned 32-bit */ jlong endpos, end64pos, cenpos, cenlen, cenoff; /* Following are unsigned 16-bit */ jint total, tablelen, i, j; unsigned char *cenbuf = NULL; unsigned char *cenend; unsigned char *cp; #ifdef USE_MMAP static jlong pagesize; jlong offset; #endif unsigned char endbuf[ENDHDR]; jint endhdrlen = ENDHDR; jzcell *entries; jint *table; /* Clear previous zip error */ zip->msg = NULL; /* Get position of END header */ if ((endpos = findEND(zip, endbuf)) == -1) return -1; /* no END header or system error */ if (endpos == 0) return 0; /* only END header present */ freeCEN(zip); /* Get position and length of central directory */ cenlen = ENDSIZ(endbuf); cenoff = ENDOFF(endbuf); total = ENDTOT(endbuf); if (cenlen == ZIP64_MAGICVAL || cenoff == ZIP64_MAGICVAL || total == ZIP64_MAGICCOUNT) { unsigned char end64buf[ZIP64_ENDHDR]; if ((end64pos = findEND64(zip, end64buf, endpos)) != -1) { cenlen = ZIP64_ENDSIZ(end64buf); cenoff = ZIP64_ENDOFF(end64buf); total = (jint)ZIP64_ENDTOT(end64buf); endpos = end64pos; endhdrlen = ZIP64_ENDHDR; } } if (cenlen > endpos) ZIP_FORMAT_ERROR("invalid END header (bad central directory size)"); cenpos = endpos - cenlen; /* Get position of first local file (LOC) header, taking into * account that there may be a stub prefixed to the zip file. */ zip->locpos = cenpos - cenoff; if (zip->locpos < 0) ZIP_FORMAT_ERROR("invalid END header (bad central directory offset)"); #ifdef USE_MMAP if (zip->usemmap) { /* On Solaris & Linux prior to JDK 6, we used to mmap the whole jar file to * read the jar file contents. However, this greatly increased the perceived * footprint numbers because the mmap'ed pages were adding into the totals shown * by 'ps' and 'top'. We switched to mmaping only the central directory of jar * file while calling 'read' to read the rest of jar file. Here are a list of * reasons apart from above of why we are doing so: * 1. Greatly reduces mmap overhead after startup complete; * 2. Avoids dual path code maintainance; * 3. Greatly reduces risk of address space (not virtual memory) exhaustion. */ if (pagesize == 0) { pagesize = (jlong)sysconf(_SC_PAGESIZE); if (pagesize == 0) goto Catch; } if (cenpos > pagesize) { offset = cenpos & ~(pagesize - 1); } else { offset = 0; } /* When we are not calling recursively, knownTotal is -1. */ if (knownTotal == -1) { void* mappedAddr; /* Mmap the CEN and END part only. We have to figure out the page size in order to make offset to be multiples of page size. */ zip->mlen = cenpos - offset + cenlen + endhdrlen; zip->offset = offset; mappedAddr = mmap64(0, zip->mlen, PROT_READ, MAP_SHARED, zip->zfd, (off64_t) offset); zip->maddr = (mappedAddr == (void*) MAP_FAILED) ? NULL : (unsigned char*)mappedAddr; if (zip->maddr == NULL) { jio_fprintf(stderr, "mmap failed for CEN and END part of zip file\n"); goto Catch; } } cenbuf = zip->maddr + cenpos - offset; } else #endif { if ((cenbuf = malloc((size_t) cenlen)) == NULL || (readFullyAt(zip->zfd, cenbuf, cenlen, cenpos) == -1)) goto Catch; } cenend = cenbuf + cenlen; /* Initialize zip file data structures based on the total number * of central directory entries as stored in ENDTOT. Since this * is a 2-byte field, but we (and other zip implementations) * support approx. 2**31 entries, we do not trust ENDTOT, but * treat it only as a strong hint. When we call ourselves * recursively, knownTotal will have the "true" value. * * Keep this path alive even with the Zip64 END support added, just * for zip files that have more than 0xffff entries but don't have * the Zip64 enabled. */ total = (knownTotal != -1) ? knownTotal : total; entries = zip->entries = calloc(total, sizeof(entries[0])); tablelen = zip->tablelen = ((total/2) | 1); // Odd -> fewer collisions table = zip->table = malloc(tablelen * sizeof(table[0])); if (entries == NULL || table == NULL) goto Catch; for (j = 0; j < tablelen; j++) table[j] = ZIP_ENDCHAIN; /* Iterate through the entries in the central directory */ for (i = 0, cp = cenbuf; cp <= cenend - CENHDR; i++, cp += CENSIZE(cp)) { /* Following are unsigned 16-bit */ jint method, nlen; unsigned int hsh; if (i >= total) { /* This will only happen if the zip file has an incorrect * ENDTOT field, which usually means it contains more than * 65535 entries. */ cenpos = readCEN(zip, countCENHeaders(cenbuf, cenend)); goto Finally; } method = CENHOW(cp); nlen = CENNAM(cp); if (GETSIG(cp) != CENSIG) ZIP_FORMAT_ERROR("invalid CEN header (bad signature)"); if (CENFLG(cp) & 1) ZIP_FORMAT_ERROR("invalid CEN header (encrypted entry)"); if (method != STORED && method != DEFLATED) ZIP_FORMAT_ERROR("invalid CEN header (bad compression method)"); if (cp + CENHDR + nlen > cenend) ZIP_FORMAT_ERROR("invalid CEN header (bad header size)"); /* if the entry is metadata add it to our metadata names */ if (isMetaName((char *)cp+CENHDR, nlen)) if (addMetaName(zip, (char *)cp+CENHDR, nlen) != 0) goto Catch; /* Record the CEN offset and the name hash in our hash cell. */ entries[i].cenpos = cenpos + (cp - cenbuf); entries[i].hash = hashN((char *)cp+CENHDR, nlen); /* Add the entry to the hash table */ hsh = entries[i].hash % tablelen; entries[i].next = table[hsh]; table[hsh] = i; } if (cp != cenend) ZIP_FORMAT_ERROR("invalid CEN header (bad header size)"); zip->total = i; goto Finally; Catch: freeCEN(zip); cenpos = -1; Finally: #ifdef USE_MMAP if (!zip->usemmap) #endif free(cenbuf); return cenpos; }
bool JarFileParser::find_end_of_central_header() { DECLARE_STATIC_BUFFER(unsigned char, buffer, TMPBUFFERSIZE); BufferedFile::Raw bf = buffered_file(); /* Get the length of the file */ const jint length = (int) bf().file_size(); /* Calculate the smallest possible offset for the end header. It * can be at most 0xFFFF + ENDHDRSIZ bytes from the end of the file, but * the file must also have a local header and a central header */ jint minOffset = length - (0xFFFF + ENDHDRSIZ); if (minOffset < LOCHDRSIZ + CENHDRSIZ) { minOffset = LOCHDRSIZ + CENHDRSIZ; } /* We assume that "buffer" contains the contents * of part of the file. currentOffset contains the offset of buffer[0]. */ /* Read in the last ENDHDRSIZ bytes into the buffer. 99% of the time, * the file won't have a comment, and this is the only read we'll need */ if ( (bf().seek(-ENDHDRSIZ, SEEK_END) < 0) || (bf().get_bytes(buffer, ENDHDRSIZ) != ENDHDRSIZ)) { return false; } /* Set currentOffset to be the offset of buffer[0] */ jint currentOffset = length - ENDHDRSIZ; /* Set bp to be the location at which to start looking */ unsigned const char* bp = buffer; for (;;) { /* "buffer" contains a block of data from the file, starting at * currentOffset "position" in the file. * We investigate whether currentOffset + (bp - buffer) is the start * of the end header in the zip file. * * We use a simplified version of Knuth-Morris-Pratt search algorithm. * The header we're looking for is 'P' 'K' 5 6 */ switch(bp[0]) { case '\006': /* The header must start at least 3 bytes back */ bp -= 3; break; case '\005': /* The header must start at least 2 bytes back */ bp -= 2; break; case 'K': /* The header must start at least 1 byte back */ bp -= 1; break; case 'P': /* Either this is the header, or the header must * start at least 4 back */ if (bp[1] == 'K' && bp[2] == 5 && bp[3] == 6) { /* We have what may be a header. Let's make sure the * implied length of the jar file matches the actual * length. */ int endpos = (int) currentOffset + (bp - buffer); if (endpos + ENDHDRSIZ + ENDCOM(bp) == length) { juint cenOffset = endpos - ENDSIZ(bp); juint locOffset = cenOffset - ENDOFF(bp); unsigned char sig[4]; if (bf().seek(locOffset, SEEK_SET) >= 0 && bf().get_bytes(sig, 4) == 4 && sig[0] == (unsigned char)'P' && sig[1] == (unsigned char)'K' && sig[2] == (unsigned char) 3 && sig[3] == (unsigned char) 4) { raw_current_entry()->cenOffset = cenOffset; raw_current_entry()->nextCenOffset = cenOffset; raw_current_entry()->locOffset = locOffset; #if ENABLE_ROM_GENERATOR raw_current_entry()->totalEntryCount = ENDTOT(bp); #endif } return true; // Found central header } } /* FALL THROUGH */ default: /* The header must start at least four characters back, since * the current character isn't in the header */ bp -= 4; } if (bp < buffer) { /* We've moved outside our window into the file. We must * move the window backwards */ size_t count = (size_t) (currentOffset - minOffset); /* Bytes left in file */ if (((jint)count) <= 0) { /* Nothing left to read. Time to give up */ return false; } else { /* up to ((bp - buffer) + ENDHDRSIZ) bytes in the buffer might * still be part of the end header, so the most bytes we can * actually read are * TMPBUFFERSIZE - ((bp - buffer) + ENDHDRSIZE). */ size_t available = (TMPBUFFERSIZE - ENDHDRSIZ) + (buffer - bp); if (count > available) { count = available; } } /* Back up, while keeping our virtual currentOffset the same */ currentOffset -= count; bp += count; jvm_memmove(buffer + count, buffer, TMPBUFFERSIZE - count); if ( bf().seek(currentOffset, SEEK_SET) < 0 || bf().get_bytes(buffer, count) != size_t(count) ) { return false; } } } /* end of for loop */ }