Пример #1
0
/*
   read_directory(archive) -> files dict (new reference)

   Given a path to a Zip archive, build a dict, mapping file names
   (local to the archive, using SEP as a separator) to toc entries.

   A toc_entry is a tuple:

       (__file__,      # value to use for __file__, available for all files
    compress,      # compression kind; 0 for uncompressed
    data_size,     # size of compressed data on disk
    file_size,     # size of decompressed data
    file_offset,   # offset of file header from start of archive
    time,          # mod time of file (in dos format)
    date,          # mod data of file (in dos format)
    crc,           # crc checksum of the data
       )

   Directories can be recognized by the trailing SEP in the name,
   data_size and file_offset are 0.
*/
static PyObject *
read_directory(char *archive)
{
    PyObject *files = NULL;
    FILE *fp;
    long compress, crc, data_size, file_size, file_offset, date, time;
    long header_offset, name_size, header_size, header_position;
    long i, l, count;
    size_t length;
    char path[MAXPATHLEN + 5];
    char name[MAXPATHLEN + 5];
    char *p, endof_central_dir[22];
    long arc_offset; /* offset from beginning of file to start of zip-archive */

    if (strlen(archive) > MAXPATHLEN) {
        PyErr_SetString(PyExc_OverflowError,
                        "Zip path name is too long");
        return NULL;
    }
    strcpy(path, archive);

    fp = fopen(archive, "rb");
    if (fp == NULL) {
        PyErr_Format(ZipImportError, "can't open Zip file: "
                     "'%.200s'", archive);
        return NULL;
    }
    fseek(fp, -22, SEEK_END);
    header_position = ftell(fp);
    if (fread(endof_central_dir, 1, 22, fp) != 22) {
        fclose(fp);
        PyErr_Format(ZipImportError, "can't read Zip file: "
                     "'%.200s'", archive);
        return NULL;
    }
    if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
        /* Bad: End of Central Dir signature */
        fclose(fp);
        PyErr_Format(ZipImportError, "not a Zip file: "
                     "'%.200s'", archive);
        return NULL;
    }

    header_size = get_long((unsigned char *)endof_central_dir + 12);
    header_offset = get_long((unsigned char *)endof_central_dir + 16);
    arc_offset = header_position - header_offset - header_size;
    header_offset += arc_offset;

    files = PyDict_New();
    if (files == NULL)
        goto error;

    length = (long)strlen(path);
    path[length] = SEP;

    /* Start of Central Directory */
    count = 0;
    for (;;) {
        PyObject *t;
        int err;

        fseek(fp, header_offset, 0);  /* Start of file header */
        l = PyMarshal_ReadLongFromFile(fp);
        if (l != 0x02014B50)
            break;              /* Bad: Central Dir File Header */
        fseek(fp, header_offset + 10, 0);
        compress = PyMarshal_ReadShortFromFile(fp);
        time = PyMarshal_ReadShortFromFile(fp);
        date = PyMarshal_ReadShortFromFile(fp);
        crc = PyMarshal_ReadLongFromFile(fp);
        data_size = PyMarshal_ReadLongFromFile(fp);
        file_size = PyMarshal_ReadLongFromFile(fp);
        name_size = PyMarshal_ReadShortFromFile(fp);
        header_size = 46 + name_size +
           PyMarshal_ReadShortFromFile(fp) +
           PyMarshal_ReadShortFromFile(fp);
        fseek(fp, header_offset + 42, 0);
        file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
        if (name_size > MAXPATHLEN)
            name_size = MAXPATHLEN;

        p = name;
        for (i = 0; i < name_size; i++) {
            *p = (char)getc(fp);
            if (*p == '/')
                *p = SEP;
            p++;
        }
        *p = 0;         /* Add terminating null byte */
        header_offset += header_size;

        strncpy(path + length + 1, name, MAXPATHLEN - length - 1);

        t = Py_BuildValue("siiiiiii", path, compress, data_size,
                          file_size, file_offset, time, date, crc);
        if (t == NULL)
            goto error;
        err = PyDict_SetItemString(files, name, t);
        Py_DECREF(t);
        if (err != 0)
            goto error;
        count++;
    }
    fclose(fp);
    if (Py_VerboseFlag)
        PySys_WriteStderr("# zipimport: found %ld names in %s\n",
            count, archive);
    return files;
error:
    fclose(fp);
    Py_XDECREF(files);
    return NULL;
}
Пример #2
0
/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
   data as a new reference. */
static PyObject *
get_data(char *archive, PyObject *toc_entry)
{
    PyObject *raw_data, *data = NULL, *decompress;
    char *buf;
    FILE *fp;
    int err;
    Py_ssize_t bytes_read = 0;
    long l;
    char *datapath;
    long compress, data_size, file_size, file_offset;
    long time, date, crc;

    if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
                          &data_size, &file_size, &file_offset, &time,
                          &date, &crc)) {
        return NULL;
    }

    fp = fopen(archive, "rb");
    if (!fp) {
        PyErr_Format(PyExc_IOError,
           "zipimport: can not open file %s", archive);
        return NULL;
    }

    /* Check to make sure the local file header is correct */
    fseek(fp, file_offset, 0);
    l = PyMarshal_ReadLongFromFile(fp);
    if (l != 0x04034B50) {
        /* Bad: Local File Header */
        PyErr_Format(ZipImportError,
                     "bad local file header in %s",
                     archive);
        fclose(fp);
        return NULL;
    }
    fseek(fp, file_offset + 26, 0);
    l = 30 + PyMarshal_ReadShortFromFile(fp) +
        PyMarshal_ReadShortFromFile(fp);        /* local header size */
    file_offset += l;           /* Start of file data */

    raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
                                          data_size : data_size + 1);
    if (raw_data == NULL) {
        fclose(fp);
        return NULL;
    }
    buf = PyString_AsString(raw_data);

    err = fseek(fp, file_offset, 0);
    if (err == 0)
        bytes_read = fread(buf, 1, data_size, fp);
    fclose(fp);
    if (err || bytes_read != data_size) {
        PyErr_SetString(PyExc_IOError,
                        "zipimport: can't read data");
        Py_DECREF(raw_data);
        return NULL;
    }

    if (compress != 0) {
        buf[data_size] = 'Z';  /* saw this in zipfile.py */
        data_size++;
    }
    buf[data_size] = '\0';

    if (compress == 0)  /* data is not compressed */
        return raw_data;

    /* Decompress with zlib */
    decompress = get_decompress_func();
    if (decompress == NULL) {
        PyErr_SetString(ZipImportError,
                        "can't decompress data; "
                        "zlib not available");
        goto error;
    }
    data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
error:
    Py_DECREF(raw_data);
    return data;
}
Пример #3
0
/*
   read_directory(archive) -> files dict (new reference)

   Given a path to a Zip archive, build a dict, mapping file names
   (local to the archive, using SEP as a separator) to toc entries.

   A toc_entry is a tuple:

   (__file__,      # value to use for __file__, available for all files,
                   # encoded to the filesystem encoding
    compress,      # compression kind; 0 for uncompressed
    data_size,     # size of compressed data on disk
    file_size,     # size of decompressed data
    file_offset,   # offset of file header from start of archive
    time,          # mod time of file (in dos format)
    date,          # mod data of file (in dos format)
    crc,           # crc checksum of the data
   )

   Directories can be recognized by the trailing SEP in the name,
   data_size and file_offset are 0.
*/
static PyObject *
read_directory(PyObject *archive)
{
    PyObject *files = NULL;
    FILE *fp;
    unsigned short flags;
    short compress, time, date, name_size;
    long crc, data_size, file_size, header_size;
    Py_ssize_t file_offset, header_position, header_offset;
    long l, count;
    Py_ssize_t i;
    char name[MAXPATHLEN + 5];
    PyObject *nameobj = NULL;
    char *p, endof_central_dir[22];
    Py_ssize_t arc_offset;  /* Absolute offset to start of the zip-archive. */
    PyObject *path;
    const char *charset;
    int bootstrap;

    fp = _Py_fopen(archive, "rb");
    if (fp == NULL) {
        if (!PyErr_Occurred())
            PyErr_Format(ZipImportError, "can't open Zip file: %R", archive);
        return NULL;
    }
    fseek(fp, -22, SEEK_END);
    header_position = ftell(fp);
    if (fread(endof_central_dir, 1, 22, fp) != 22) {
        fclose(fp);
        PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
        return NULL;
    }
    if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
        /* Bad: End of Central Dir signature */
        fclose(fp);
        PyErr_Format(ZipImportError, "not a Zip file: %R", archive);
        return NULL;
    }

    header_size = get_long((unsigned char *)endof_central_dir + 12);
    header_offset = get_long((unsigned char *)endof_central_dir + 16);
    arc_offset = header_position - header_offset - header_size;
    header_offset += arc_offset;

    files = PyDict_New();
    if (files == NULL)
        goto error;

    /* Start of Central Directory */
    count = 0;
    for (;;) {
        PyObject *t;
        int err;

        fseek(fp, header_offset, 0);  /* Start of file header */
        l = PyMarshal_ReadLongFromFile(fp);
        if (l != 0x02014B50)
            break;              /* Bad: Central Dir File Header */
        fseek(fp, header_offset + 8, 0);
        flags = (unsigned short)PyMarshal_ReadShortFromFile(fp);
        compress = PyMarshal_ReadShortFromFile(fp);
        time = PyMarshal_ReadShortFromFile(fp);
        date = PyMarshal_ReadShortFromFile(fp);
        crc = PyMarshal_ReadLongFromFile(fp);
        data_size = PyMarshal_ReadLongFromFile(fp);
        file_size = PyMarshal_ReadLongFromFile(fp);
        name_size = PyMarshal_ReadShortFromFile(fp);
        header_size = 46 + name_size +
           PyMarshal_ReadShortFromFile(fp) +
           PyMarshal_ReadShortFromFile(fp);
        fseek(fp, header_offset + 42, 0);
        file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
        if (name_size > MAXPATHLEN)
            name_size = MAXPATHLEN;

        p = name;
        for (i = 0; i < (Py_ssize_t)name_size; i++) {
            *p = (char)getc(fp);
            if (*p == '/')
                *p = SEP;
            p++;
        }
        *p = 0;         /* Add terminating null byte */
        header_offset += header_size;

        bootstrap = 0;
        if (flags & 0x0800)
            charset = "utf-8";
        else if (!PyThreadState_GET()->interp->codecs_initialized) {
            /* During bootstrap, we may need to load the encodings
               package from a ZIP file. But the cp437 encoding is implemented
               in Python in the encodings package.

               Break out of this dependency by assuming that the path to
               the encodings module is ASCII-only. */
            charset = "ascii";
            bootstrap = 1;
        }
        else
            charset = "cp437";
        nameobj = PyUnicode_Decode(name, name_size, charset, NULL);
        if (PyUnicode_READY(nameobj) == -1)
            goto error;
        if (nameobj == NULL) {
            if (bootstrap)
                PyErr_Format(PyExc_NotImplementedError,
                    "bootstrap issue: python%i%i.zip contains non-ASCII "
                    "filenames without the unicode flag",
                    PY_MAJOR_VERSION, PY_MINOR_VERSION);
            goto error;
        }
        path = PyUnicode_FromFormat("%U%c%U", archive, SEP, nameobj);
        if (path == NULL)
            goto error;
        t = Py_BuildValue("Nhllnhhl", path, compress, data_size,
                          file_size, file_offset, time, date, crc);
        if (t == NULL)
            goto error;
        err = PyDict_SetItem(files, nameobj, t);
        Py_CLEAR(nameobj);
        Py_DECREF(t);
        if (err != 0)
            goto error;
        count++;
    }
    fclose(fp);
    if (Py_VerboseFlag)
        PySys_FormatStderr("# zipimport: found %ld names in %R\n",
                           count, archive);
    return files;
error:
    fclose(fp);
    Py_XDECREF(files);
    Py_XDECREF(nameobj);
    return NULL;
}