/*
 * Checks to see if we have ZIP64 archive, and save
 * the check for later use
 */
static int
haveZIP64(Byte *p) {
    jlong cenlen, cenoff, centot;
    cenlen = ENDSIZ(p);
    cenoff = ENDOFF(p);
    centot = ENDTOT(p);
    zip64_present = (cenlen == ZIP64_MAGICVAL ||
                     cenoff == ZIP64_MAGICVAL ||
                     centot == ZIP64_MAGICCOUNT);
    return zip64_present;
}
Exemple #2
0
/*
 * Reads zip file central directory. Returns the file position of first
 * CEN header, otherwise returns -1 if an error occured. If zip->msg != NULL
 * then the error was a zip format error and zip->msg has the error text.
 * Always pass in -1 for knownTotal; it's used for a recursive call.
 */
static jlong
readCEN(jzfile *zip, jint knownTotal)
{
    /* Following are unsigned 32-bit */
    jlong endpos, end64pos, cenpos, cenlen, cenoff;
    /* Following are unsigned 16-bit */
    jint total, tablelen, i, j;
    unsigned char *cenbuf = NULL;
    unsigned char *cenend;
    unsigned char *cp;
#ifdef USE_MMAP
    static jlong pagesize;
    jlong offset;
#endif
    unsigned char endbuf[ENDHDR];
    jint endhdrlen = ENDHDR;
    jzcell *entries;
    jint *table;

    /* Clear previous zip error */
    zip->msg = NULL;
    /* Get position of END header */
    if ((endpos = findEND(zip, endbuf)) == -1)
        return -1; /* no END header or system error */

    if (endpos == 0) return 0;  /* only END header present */

    freeCEN(zip);
   /* Get position and length of central directory */
    cenlen = ENDSIZ(endbuf);
    cenoff = ENDOFF(endbuf);
    total  = ENDTOT(endbuf);
    if (cenlen == ZIP64_MAGICVAL || cenoff == ZIP64_MAGICVAL ||
        total == ZIP64_MAGICCOUNT) {
        unsigned char end64buf[ZIP64_ENDHDR];
        if ((end64pos = findEND64(zip, end64buf, endpos)) != -1) {
            cenlen = ZIP64_ENDSIZ(end64buf);
            cenoff = ZIP64_ENDOFF(end64buf);
            total = (jint)ZIP64_ENDTOT(end64buf);
            endpos = end64pos;
            endhdrlen = ZIP64_ENDHDR;
        }
    }

    if (cenlen > endpos)
        ZIP_FORMAT_ERROR("invalid END header (bad central directory size)");
    cenpos = endpos - cenlen;

    /* Get position of first local file (LOC) header, taking into
     * account that there may be a stub prefixed to the zip file. */
    zip->locpos = cenpos - cenoff;
    if (zip->locpos < 0)
        ZIP_FORMAT_ERROR("invalid END header (bad central directory offset)");

#ifdef USE_MMAP
    if (zip->usemmap) {
      /* On Solaris & Linux prior to JDK 6, we used to mmap the whole jar file to
       * read the jar file contents. However, this greatly increased the perceived
       * footprint numbers because the mmap'ed pages were adding into the totals shown
       * by 'ps' and 'top'. We switched to mmaping only the central directory of jar
       * file while calling 'read' to read the rest of jar file. Here are a list of
       * reasons apart from above of why we are doing so:
       * 1. Greatly reduces mmap overhead after startup complete;
       * 2. Avoids dual path code maintainance;
       * 3. Greatly reduces risk of address space (not virtual memory) exhaustion.
       */
        if (pagesize == 0) {
            pagesize = (jlong)sysconf(_SC_PAGESIZE);
            if (pagesize == 0) goto Catch;
        }
        if (cenpos > pagesize) {
            offset = cenpos & ~(pagesize - 1);
        } else {
            offset = 0;
        }
        /* When we are not calling recursively, knownTotal is -1. */
        if (knownTotal == -1) {
            void* mappedAddr;
            /* Mmap the CEN and END part only. We have to figure
               out the page size in order to make offset to be multiples of
               page size.
            */
            zip->mlen = cenpos - offset + cenlen + endhdrlen;
            zip->offset = offset;
            mappedAddr = mmap64(0, zip->mlen, PROT_READ, MAP_SHARED, zip->zfd, (off64_t) offset);
            zip->maddr = (mappedAddr == (void*) MAP_FAILED) ? NULL :
                (unsigned char*)mappedAddr;

            if (zip->maddr == NULL) {
                jio_fprintf(stderr, "mmap failed for CEN and END part of zip file\n");
                goto Catch;
            }
        }
        cenbuf = zip->maddr + cenpos - offset;
    } else
#endif
    {
        if ((cenbuf = malloc((size_t) cenlen)) == NULL ||
            (readFullyAt(zip->zfd, cenbuf, cenlen, cenpos) == -1))
        goto Catch;
    }

    cenend = cenbuf + cenlen;

    /* Initialize zip file data structures based on the total number
     * of central directory entries as stored in ENDTOT.  Since this
     * is a 2-byte field, but we (and other zip implementations)
     * support approx. 2**31 entries, we do not trust ENDTOT, but
     * treat it only as a strong hint.  When we call ourselves
     * recursively, knownTotal will have the "true" value.
     *
     * Keep this path alive even with the Zip64 END support added, just
     * for zip files that have more than 0xffff entries but don't have
     * the Zip64 enabled.
     */
    total = (knownTotal != -1) ? knownTotal : total;
    entries  = zip->entries  = calloc(total, sizeof(entries[0]));
    tablelen = zip->tablelen = ((total/2) | 1); // Odd -> fewer collisions
    table    = zip->table    = malloc(tablelen * sizeof(table[0]));
    if (entries == NULL || table == NULL) goto Catch;
    for (j = 0; j < tablelen; j++)
        table[j] = ZIP_ENDCHAIN;

    /* Iterate through the entries in the central directory */
    for (i = 0, cp = cenbuf; cp <= cenend - CENHDR; i++, cp += CENSIZE(cp)) {
        /* Following are unsigned 16-bit */
        jint method, nlen;
        unsigned int hsh;

        if (i >= total) {
            /* This will only happen if the zip file has an incorrect
             * ENDTOT field, which usually means it contains more than
             * 65535 entries. */
            cenpos = readCEN(zip, countCENHeaders(cenbuf, cenend));
            goto Finally;
        }

        method = CENHOW(cp);
        nlen   = CENNAM(cp);

        if (GETSIG(cp) != CENSIG)
            ZIP_FORMAT_ERROR("invalid CEN header (bad signature)");
        if (CENFLG(cp) & 1)
            ZIP_FORMAT_ERROR("invalid CEN header (encrypted entry)");
        if (method != STORED && method != DEFLATED)
            ZIP_FORMAT_ERROR("invalid CEN header (bad compression method)");
        if (cp + CENHDR + nlen > cenend)
            ZIP_FORMAT_ERROR("invalid CEN header (bad header size)");

        /* if the entry is metadata add it to our metadata names */
        if (isMetaName((char *)cp+CENHDR, nlen))
            if (addMetaName(zip, (char *)cp+CENHDR, nlen) != 0)
                goto Catch;

        /* Record the CEN offset and the name hash in our hash cell. */
        entries[i].cenpos = cenpos + (cp - cenbuf);
        entries[i].hash = hashN((char *)cp+CENHDR, nlen);

        /* Add the entry to the hash table */
        hsh = entries[i].hash % tablelen;
        entries[i].next = table[hsh];
        table[hsh] = i;
    }
    if (cp != cenend)
        ZIP_FORMAT_ERROR("invalid CEN header (bad header size)");

    zip->total = i;
    goto Finally;

 Catch:
    freeCEN(zip);
    cenpos = -1;

 Finally:
#ifdef USE_MMAP
    if (!zip->usemmap)
#endif
        free(cenbuf);

    return cenpos;
}
bool JarFileParser::find_end_of_central_header() {
  DECLARE_STATIC_BUFFER(unsigned char, buffer, TMPBUFFERSIZE);
  BufferedFile::Raw bf = buffered_file();

  /* Get the length of the file */
  const jint length = (int) bf().file_size();

  /* Calculate the smallest possible offset for the end header.  It
   * can be at most 0xFFFF + ENDHDRSIZ bytes from the end of the file, but
   * the file must also have a local header and a central header
   */
  jint minOffset = length - (0xFFFF + ENDHDRSIZ);
  if (minOffset < LOCHDRSIZ + CENHDRSIZ) {
    minOffset = LOCHDRSIZ + CENHDRSIZ;
  }

  /* We assume that "buffer" contains the contents
   * of part of the file. currentOffset contains the offset of buffer[0].
   */

  /* Read in the last ENDHDRSIZ bytes into the buffer.  99% of the time,
   * the file won't have a comment, and this is the only read we'll need */
  if ( (bf().seek(-ENDHDRSIZ, SEEK_END) < 0)
    || (bf().get_bytes(buffer, ENDHDRSIZ) != ENDHDRSIZ)) {
    return false;
  }
  /* Set currentOffset to be the offset of buffer[0] */
  jint currentOffset = length - ENDHDRSIZ;
  /* Set bp to be the location at which to start looking */
  unsigned const char* bp = buffer;

  for (;;) {
    /* "buffer" contains a block of data from the file, starting at
     * currentOffset "position" in the file.
     * We investigate whether   currentOffset + (bp - buffer)  is the start
     * of the end header in the zip file.
     *
     * We use a simplified version of Knuth-Morris-Pratt search algorithm.
     * The header we're looking for is 'P' 'K' 5  6
     */
    switch(bp[0]) {
    case '\006':   /* The header must start at least 3 bytes back */
      bp -= 3; break;
    case '\005':   /* The header must start at least 2 bytes back  */
      bp -= 2; break;
    case 'K':      /* The header must start at least 1 byte back  */
      bp -= 1; break;
    case 'P':      /* Either this is the header, or the header must
                    * start at least 4  back */
      if (bp[1] == 'K' && bp[2] == 5 && bp[3] == 6) {
        /* We have what may be a header.  Let's make sure the
         * implied length of the jar file matches the actual
         * length.
         */
        int endpos = (int) currentOffset + (bp - buffer);
        if (endpos + ENDHDRSIZ + ENDCOM(bp) == length) {
          juint cenOffset = endpos - ENDSIZ(bp);
          juint locOffset = cenOffset - ENDOFF(bp);
          unsigned char sig[4];

          if (bf().seek(locOffset, SEEK_SET) >= 0 &&
              bf().get_bytes(sig, 4) == 4 &&
              sig[0] == (unsigned char)'P' && 
              sig[1] == (unsigned char)'K' && 
              sig[2] == (unsigned char) 3  && 
              sig[3] == (unsigned char) 4) {

            raw_current_entry()->cenOffset = cenOffset;
            raw_current_entry()->nextCenOffset = cenOffset;
            raw_current_entry()->locOffset = locOffset;
#if ENABLE_ROM_GENERATOR
            raw_current_entry()->totalEntryCount = ENDTOT(bp);
#endif
          }
          return true; // Found central header
        }
      }
      /* FALL THROUGH */
    default:
      /* The header must start at least four characters back, since
       * the current character isn't in the header */
      bp -= 4;
    }
    if (bp < buffer) {
      /* We've moved outside our window into the file.  We must
       * move the window backwards */
      size_t count = (size_t) (currentOffset - minOffset); /* Bytes left in file */
      if (((jint)count) <= 0) {
        /* Nothing left to read.  Time to give up */
        return false;
      } else {
        /* up to ((bp - buffer) + ENDHDRSIZ) bytes in the buffer might
         * still be part of the end header, so the most bytes we can
         * actually read are
         *      TMPBUFFERSIZE - ((bp - buffer) + ENDHDRSIZE).
         */
        size_t available = (TMPBUFFERSIZE - ENDHDRSIZ) + (buffer - bp);
        if (count > available) {
          count = available;
        }
      }
      /* Back up, while keeping our virtual currentOffset the same */
      currentOffset -= count;
      bp += count;
      jvm_memmove(buffer + count, buffer, TMPBUFFERSIZE - count);
      if ( bf().seek(currentOffset, SEEK_SET) < 0 ||
           bf().get_bytes(buffer, count) != size_t(count) ) {
        return false;
      }
    }
  } /* end of for loop */
}