Ejemplo n.º 1
0
/* New reference. */
static PyObject *
normalize_module(PyObject *filename)
{
    PyObject *module;
    int kind;
    void *data;
    Py_ssize_t len;

    len = PyUnicode_GetLength(filename);
    if (len < 0)
        return NULL;

    if (len == 0)
        return PyUnicode_FromString("<unknown>");

    kind = PyUnicode_KIND(filename);
    data = PyUnicode_DATA(filename);

    /* if filename.endswith(".py"): */
    if (len >= 3 &&
        PyUnicode_READ(kind, data, len-3) == '.' &&
        PyUnicode_READ(kind, data, len-2) == 'p' &&
        PyUnicode_READ(kind, data, len-1) == 'y')
    {
        module = PyUnicode_Substring(filename, 0, len-3);
    }
    else {
        module = filename;
        Py_INCREF(module);
    }
    return module;
}
Ejemplo n.º 2
0
static PyObject *
Reader_iternext(ReaderObj *self)
{
    PyObject *fields = NULL;
    Py_UCS4 c;
    Py_ssize_t pos, linelen;
    unsigned int kind;
    void *data;
    PyObject *lineobj;

    if (parse_reset(self) < 0)
        return NULL;
    do {
        lineobj = PyIter_Next(self->input_iter);
        if (lineobj == NULL) {
            /* End of input OR exception */
            if (!PyErr_Occurred() && self->field_len != 0)
                PyErr_Format(error_obj,
                             "newline inside string");
            return NULL;
        }
        if (!PyUnicode_Check(lineobj)) {
            PyErr_Format(error_obj,
                         "iterator should return strings, "
                         "not %.200s "
                         "(did you open the file in text mode?)",
                         lineobj->ob_type->tp_name
                );
            Py_DECREF(lineobj);
            return NULL;
        }
        ++self->line_num;
        kind = PyUnicode_KIND(lineobj);
        data = PyUnicode_DATA(lineobj);
        pos = 0;
        linelen = PyUnicode_GET_LENGTH(lineobj);
        while (linelen--) {
            c = PyUnicode_READ(kind, data, pos);
            if (c == '\0') {
                Py_DECREF(lineobj);
                PyErr_Format(error_obj,
                             "line contains NULL byte");
                goto err;
            }
            if (parse_process_char(self, c) < 0) {
                Py_DECREF(lineobj);
                goto err;
            }
            pos++;
        }
        Py_DECREF(lineobj);
        if (parse_process_char(self, 0) < 0)
            goto err;
    } while (self->state != START_RECORD);

    fields = self->fields;
    self->fields = NULL;
err:
    return fields;
}
Ejemplo n.º 3
0
/*
    Internal function to read the codepoint at the given index from the input.
*/
static Unicode read_codepoint(TokenizerInput* text, Py_ssize_t index)
{
#ifdef PEP_393
    return PyUnicode_READ(text->kind, text->data, index);
#else
    return text->buf[index];
#endif
}
Ejemplo n.º 4
0
static int
join_append_lineterminator(WriterObj *self)
{
    Py_ssize_t terminator_len, i;
    unsigned int term_kind;
    void *term_data;

    terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
    if (terminator_len == -1)
        return 0;

    /* grow record buffer if necessary */
    if (!join_check_rec_size(self, self->rec_len + terminator_len))
        return 0;

    term_kind = PyUnicode_KIND(self->dialect->lineterminator);
    term_data = PyUnicode_DATA(self->dialect->lineterminator);
    for (i = 0; i < terminator_len; i++)
        self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
    self->rec_len += terminator_len;

    return 1;
}
Ejemplo n.º 5
0
/* Returns 0 on error (no new refs), 1 on success */
static int
setup_context(Py_ssize_t stack_level, PyObject **filename, int *lineno,
              PyObject **module, PyObject **registry)
{
    PyObject *globals;

    /* Setup globals and lineno. */
    PyFrameObject *f = PyThreadState_GET()->frame;
    // Stack level comparisons to Python code is off by one as there is no
    // warnings-related stack level to avoid.
    if (stack_level <= 0 || is_internal_frame(f)) {
        while (--stack_level > 0 && f != NULL) {
            f = f->f_back;
        }
    }
    else {
        while (--stack_level > 0 && f != NULL) {
            f = next_external_frame(f);
        }
    }

    if (f == NULL) {
        globals = PyThreadState_Get()->interp->sysdict;
        *lineno = 1;
    }
    else {
        globals = f->f_globals;
        *lineno = PyFrame_GetLineNumber(f);
    }

    *module = NULL;

    /* Setup registry. */
    assert(globals != NULL);
    assert(PyDict_Check(globals));
    *registry = PyDict_GetItemString(globals, "__warningregistry__");
    if (*registry == NULL) {
        int rc;

        *registry = PyDict_New();
        if (*registry == NULL)
            return 0;

         rc = PyDict_SetItemString(globals, "__warningregistry__", *registry);
         if (rc < 0)
            goto handle_error;
    }
    else
        Py_INCREF(*registry);

    /* Setup module. */
    *module = PyDict_GetItemString(globals, "__name__");
    if (*module == NULL) {
        *module = PyUnicode_FromString("<string>");
        if (*module == NULL)
            goto handle_error;
    }
    else
        Py_INCREF(*module);

    /* Setup filename. */
    *filename = PyDict_GetItemString(globals, "__file__");
    if (*filename != NULL && PyUnicode_Check(*filename)) {
        Py_ssize_t len;
        int kind;
        void *data;

        if (PyUnicode_READY(*filename))
            goto handle_error;

        len = PyUnicode_GetLength(*filename);
        kind = PyUnicode_KIND(*filename);
        data = PyUnicode_DATA(*filename);

#define ascii_lower(c) ((c <= 127) ? Py_TOLOWER(c) : 0)
        /* if filename.lower().endswith(".pyc"): */
        if (len >= 4 &&
            PyUnicode_READ(kind, data, len-4) == '.' &&
            ascii_lower(PyUnicode_READ(kind, data, len-3)) == 'p' &&
            ascii_lower(PyUnicode_READ(kind, data, len-2)) == 'y' &&
            ascii_lower(PyUnicode_READ(kind, data, len-1)) == 'c')
        {
            *filename = PyUnicode_Substring(*filename, 0,
                                            PyUnicode_GET_LENGTH(*filename)-1);
            if (*filename == NULL)
                goto handle_error;
        }
        else
            Py_INCREF(*filename);
    }
    else {
        *filename = NULL;
        if (*module != Py_None && PyUnicode_CompareWithASCIIString(*module, "__main__") == 0) {
            PyObject *argv = _PySys_GetObjectId(&PyId_argv);
            /* PyList_Check() is needed because sys.argv is set to None during
               Python finalization */
            if (argv != NULL && PyList_Check(argv) && PyList_Size(argv) > 0) {
                int is_true;
                *filename = PyList_GetItem(argv, 0);
                Py_INCREF(*filename);
                /* If sys.argv[0] is false, then use '__main__'. */
                is_true = PyObject_IsTrue(*filename);
                if (is_true < 0) {
                    Py_DECREF(*filename);
                    goto handle_error;
                }
                else if (!is_true) {
                    Py_XSETREF(*filename, PyUnicode_FromString("__main__"));
                    if (*filename == NULL)
                        goto handle_error;
                }
            }
            else {
                /* embedded interpreters don't have sys.argv, see bug #839151 */
                *filename = PyUnicode_FromString("__main__");
                if (*filename == NULL)
                    goto handle_error;
            }
        }
        if (*filename == NULL) {
            *filename = *module;
            Py_INCREF(*filename);
        }
    }

    return 1;

 handle_error:
    /* filename not XDECREF'ed here as there is no way to jump here with a
       dangling reference. */
    Py_XDECREF(*registry);
    Py_XDECREF(*module);
    return 0;
}
Ejemplo n.º 6
0
static void
show_warning(PyObject *filename, int lineno, PyObject *text,
             PyObject *category, PyObject *sourceline)
{
    PyObject *f_stderr;
    PyObject *name;
    char lineno_str[128];
    _Py_IDENTIFIER(__name__);

    PyOS_snprintf(lineno_str, sizeof(lineno_str), ":%d: ", lineno);

    name = _PyObject_GetAttrId(category, &PyId___name__);
    if (name == NULL)  /* XXX Can an object lack a '__name__' attribute? */
        goto error;

    f_stderr = _PySys_GetObjectId(&PyId_stderr);
    if (f_stderr == NULL) {
        fprintf(stderr, "lost sys.stderr\n");
        goto error;
    }

    /* Print "filename:lineno: category: text\n" */
    if (PyFile_WriteObject(filename, f_stderr, Py_PRINT_RAW) < 0)
        goto error;
    if (PyFile_WriteString(lineno_str, f_stderr) < 0)
        goto error;
    if (PyFile_WriteObject(name, f_stderr, Py_PRINT_RAW) < 0)
        goto error;
    if (PyFile_WriteString(": ", f_stderr) < 0)
        goto error;
    if (PyFile_WriteObject(text, f_stderr, Py_PRINT_RAW) < 0)
        goto error;
    if (PyFile_WriteString("\n", f_stderr) < 0)
        goto error;
    Py_CLEAR(name);

    /* Print "  source_line\n" */
    if (sourceline) {
        int kind;
        void *data;
        Py_ssize_t i, len;
        Py_UCS4 ch;
        PyObject *truncated;

        if (PyUnicode_READY(sourceline) < 1)
            goto error;

        kind = PyUnicode_KIND(sourceline);
        data = PyUnicode_DATA(sourceline);
        len = PyUnicode_GET_LENGTH(sourceline);
        for (i=0; i<len; i++) {
            ch = PyUnicode_READ(kind, data, i);
            if (ch != ' ' && ch != '\t' && ch != '\014')
                break;
        }

        truncated = PyUnicode_Substring(sourceline, i, len);
        if (truncated == NULL)
            goto error;

        PyFile_WriteObject(sourceline, f_stderr, Py_PRINT_RAW);
        Py_DECREF(truncated);
        PyFile_WriteString("\n", f_stderr);
    }
    else {
        _Py_DisplaySourceLine(f_stderr, filename, lineno, 2);
    }

error:
    Py_XDECREF(name);
    PyErr_Clear();
}
Ejemplo n.º 7
0
/* Fill in the digit parts of a numbers's string representation,
   as determined in calc_number_widths().
   Return -1 on error, or 0 on success. */
static int
fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
            PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
            PyObject *prefix, Py_ssize_t p_start,
            Py_UCS4 fill_char,
            LocaleInfo *locale, int toupper)
{
    /* Used to keep track of digits, decimal, and remainder. */
    Py_ssize_t d_pos = d_start;
    const unsigned int kind = writer->kind;
    const void *data = writer->data;
    Py_ssize_t r;

    if (spec->n_lpadding) {
        _PyUnicode_FastFill(writer->buffer,
                            writer->pos, spec->n_lpadding, fill_char);
        writer->pos += spec->n_lpadding;
    }
    if (spec->n_sign == 1) {
        PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
        writer->pos++;
    }
    if (spec->n_prefix) {
        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
                                      prefix, p_start,
                                      spec->n_prefix);
        if (toupper) {
            Py_ssize_t t;
            for (t = 0; t < spec->n_prefix; t++) {
                Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
                c = Py_TOUPPER(c);
                assert (c <= 127);
                PyUnicode_WRITE(kind, data, writer->pos + t, c);
            }
        }
        writer->pos += spec->n_prefix;
    }
    if (spec->n_spadding) {
        _PyUnicode_FastFill(writer->buffer,
                            writer->pos, spec->n_spadding, fill_char);
        writer->pos += spec->n_spadding;
    }

    /* Only for type 'c' special case, it has no digits. */
    if (spec->n_digits != 0) {
        /* Fill the digits with InsertThousandsGrouping. */
        char *pdigits;
        if (PyUnicode_READY(digits))
            return -1;
        pdigits = PyUnicode_DATA(digits);
        if (PyUnicode_KIND(digits) < kind) {
            pdigits = _PyUnicode_AsKind(digits, kind);
            if (pdigits == NULL)
                return -1;
        }
        r = _PyUnicode_InsertThousandsGrouping(
                writer->buffer, writer->pos,
                spec->n_grouped_digits,
                pdigits + kind * d_pos,
                spec->n_digits, spec->n_min_width,
                locale->grouping, locale->thousands_sep, NULL);
        if (r == -1)
            return -1;
        assert(r == spec->n_grouped_digits);
        if (PyUnicode_KIND(digits) < kind)
            PyMem_Free(pdigits);
        d_pos += spec->n_digits;
    }
    if (toupper) {
        Py_ssize_t t;
        for (t = 0; t < spec->n_grouped_digits; t++) {
            Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
            c = Py_TOUPPER(c);
            if (c > 127) {
                PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
                return -1;
            }
            PyUnicode_WRITE(kind, data, writer->pos + t, c);
        }
    }
    writer->pos += spec->n_grouped_digits;

    if (spec->n_decimal) {
        _PyUnicode_FastCopyCharacters(
            writer->buffer, writer->pos,
            locale->decimal_point, 0, spec->n_decimal);
        writer->pos += spec->n_decimal;
        d_pos += 1;
    }

    if (spec->n_remainder) {
        _PyUnicode_FastCopyCharacters(
            writer->buffer, writer->pos,
            digits, d_pos, spec->n_remainder);
        writer->pos += spec->n_remainder;
        /* d_pos += spec->n_remainder; */
    }

    if (spec->n_rpadding) {
        _PyUnicode_FastFill(writer->buffer,
                            writer->pos, spec->n_rpadding,
                            fill_char);
        writer->pos += spec->n_rpadding;
    }
    return 0;
}
Ejemplo n.º 8
0
/* Calculate new record length or append field to record.  Return new
 * record length.
 */
static Py_ssize_t
join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
                 Py_ssize_t field_len, int quote_empty, int *quoted,
                 int copy_phase)
{
    DialectObj *dialect = self->dialect;
    int i;
    Py_ssize_t rec_len;

#define ADDCH(c) \
    do {\
        if (copy_phase) \
            self->rec[rec_len] = c;\
        rec_len++;\
    } while(0)

    rec_len = self->rec_len;

    /* If this is not the first field we need a field separator */
    if (self->num_fields > 0)
        ADDCH(dialect->delimiter);

    /* Handle preceding quote */
    if (copy_phase && *quoted)
        ADDCH(dialect->quotechar);

    /* Copy/count field data */
    /* If field is null just pass over */
    for (i = 0; field_data && (i < field_len); i++) {
        Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
        int want_escape = 0;

        if (c == dialect->delimiter ||
            c == dialect->escapechar ||
            c == dialect->quotechar  ||
            PyUnicode_FindChar(
                dialect->lineterminator, c, 0,
                PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
            if (dialect->quoting == QUOTE_NONE)
                want_escape = 1;
            else {
                if (c == dialect->quotechar) {
                    if (dialect->doublequote)
                        ADDCH(dialect->quotechar);
                    else
                        want_escape = 1;
                }
                if (!want_escape)
                    *quoted = 1;
            }
            if (want_escape) {
                if (!dialect->escapechar) {
                    PyErr_Format(error_obj,
                                 "need to escape, but no escapechar set");
                    return -1;
                }
                ADDCH(dialect->escapechar);
            }
        }
        /* Copy field character into record buffer.
         */
        ADDCH(c);
    }

    /* If field is empty check if it needs to be quoted.
     */
    if (i == 0 && quote_empty) {
        if (dialect->quoting == QUOTE_NONE) {
            PyErr_Format(error_obj,
                "single empty field record must be quoted");
            return -1;
        }
        else
            *quoted = 1;
    }

    if (*quoted) {
        if (copy_phase)
            ADDCH(dialect->quotechar);
        else
            rec_len += 2;
    }
    return rec_len;
#undef ADDCH
}
Ejemplo n.º 9
0
static void
dump_ascii(int fd, PyObject *text)
{
    PyASCIIObject *ascii = (PyASCIIObject *)text;
    Py_ssize_t i, size;
    int truncated;
    int kind;
    void *data = NULL;
    wchar_t *wstr = NULL;
    Py_UCS4 ch;

    size = ascii->length;
    kind = ascii->state.kind;
    if (ascii->state.compact) {
        if (ascii->state.ascii)
            data = ((PyASCIIObject*)text) + 1;
        else
            data = ((PyCompactUnicodeObject*)text) + 1;
    }
    else if (kind != PyUnicode_WCHAR_KIND) {
        data = ((PyUnicodeObject *)text)->data.any;
        if (data == NULL)
            return;
    }
    else {
        wstr = ((PyASCIIObject *)text)->wstr;
        if (wstr == NULL)
            return;
        size = ((PyCompactUnicodeObject *)text)->wstr_length;
    }

    if (MAX_STRING_LENGTH < size) {
        size = MAX_STRING_LENGTH;
        truncated = 1;
    }
    else
        truncated = 0;

    for (i=0; i < size; i++) {
        if (kind != PyUnicode_WCHAR_KIND)
            ch = PyUnicode_READ(kind, data, i);
        else
            ch = wstr[i];
        if (ch < 128) {
            char c = (char)ch;
            write(fd, &c, 1);
        }
        else if (ch < 0xff) {
            PUTS(fd, "\\x");
            dump_hexadecimal(fd, ch, 2);
        }
        else if (ch < 0xffff) {
            PUTS(fd, "\\u");
            dump_hexadecimal(fd, ch, 4);
        }
        else {
            PUTS(fd, "\\U");
            dump_hexadecimal(fd, ch, 8);
        }
    }
    if (truncated)
        PUTS(fd, "...");
}
Ejemplo n.º 10
0
int
_Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent)
{
    int err = 0;
    int fd;
    int i;
    char *found_encoding;
    char *encoding;
    PyObject *io;
    PyObject *binary;
    PyObject *fob = NULL;
    PyObject *lineobj = NULL;
    PyObject *res;
    char buf[MAXPATHLEN+1];
    int kind;
    void *data;

    /* open the file */
    if (filename == NULL)
        return 0;

    io = PyImport_ImportModuleNoBlock("io");
    if (io == NULL)
        return -1;
    binary = _PyObject_CallMethodId(io, &PyId_open, "Os", filename, "rb");

    if (binary == NULL) {
        PyErr_Clear();

        binary = _Py_FindSourceFile(filename, buf, sizeof(buf), io);
        if (binary == NULL) {
            Py_DECREF(io);
            return -1;
        }
    }

    /* use the right encoding to decode the file as unicode */
    fd = PyObject_AsFileDescriptor(binary);
    if (fd < 0) {
        Py_DECREF(io);
        Py_DECREF(binary);
        return 0;
    }
    found_encoding = PyTokenizer_FindEncodingFilename(fd, filename);
    if (found_encoding == NULL)
        PyErr_Clear();
    encoding = (found_encoding != NULL) ? found_encoding : "utf-8";
    /* Reset position */
    if (lseek(fd, 0, SEEK_SET) == (off_t)-1) {
        Py_DECREF(io);
        Py_DECREF(binary);
        PyMem_FREE(found_encoding);
        return 0;
    }
    fob = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "Os", binary, encoding);
    Py_DECREF(io);
    Py_DECREF(binary);
    PyMem_FREE(found_encoding);

    if (fob == NULL) {
        PyErr_Clear();
        return 0;
    }

    /* get the line number lineno */
    for (i = 0; i < lineno; i++) {
        Py_XDECREF(lineobj);
        lineobj = PyFile_GetLine(fob, -1);
        if (!lineobj) {
            err = -1;
            break;
        }
    }
    res = _PyObject_CallMethodId(fob, &PyId_close, "");
    if (res)
        Py_DECREF(res);
    else
        PyErr_Clear();
    Py_DECREF(fob);
    if (!lineobj || !PyUnicode_Check(lineobj)) {
        Py_XDECREF(lineobj);
        return err;
    }

    /* remove the indentation of the line */
    kind = PyUnicode_KIND(lineobj);
    data = PyUnicode_DATA(lineobj);
    for (i=0; i < PyUnicode_GET_LENGTH(lineobj); i++) {
        Py_UCS4 ch = PyUnicode_READ(kind, data, i);
        if (ch != ' ' && ch != '\t' && ch != '\014')
            break;
    }
    if (i) {
        PyObject *truncated;
        truncated = PyUnicode_Substring(lineobj, i, PyUnicode_GET_LENGTH(lineobj));
        if (truncated) {
            Py_DECREF(lineobj);
            lineobj = truncated;
        } else {
            PyErr_Clear();
        }
    }

    /* Write some spaces before the line */
    strcpy(buf, "          ");
    assert (strlen(buf) == 10);
    while (indent > 0) {
        if (indent < 10)
            buf[indent] = '\0';
        err = PyFile_WriteString(buf, f);
        if (err != 0)
            break;
        indent -= 10;
    }

    /* finally display the line */
    if (err == 0)
        err = PyFile_WriteObject(lineobj, f, Py_PRINT_RAW);
    Py_DECREF(lineobj);
    if  (err == 0)
        err = PyFile_WriteString("\n", f);
    return err;
}
Ejemplo n.º 11
0
void
_Py_DumpASCII(int fd, PyObject *text)
{
    PyASCIIObject *ascii = (PyASCIIObject *)text;
    Py_ssize_t i, size;
    int truncated;
    int kind;
    void *data = NULL;
    wchar_t *wstr = NULL;
    Py_UCS4 ch;

    if (!PyUnicode_Check(text))
        return;

    size = ascii->length;
    kind = ascii->state.kind;
    if (kind == PyUnicode_WCHAR_KIND) {
        wstr = ((PyASCIIObject *)text)->wstr;
        if (wstr == NULL)
            return;
        size = ((PyCompactUnicodeObject *)text)->wstr_length;
    }
    else if (ascii->state.compact) {
        if (ascii->state.ascii)
            data = ((PyASCIIObject*)text) + 1;
        else
            data = ((PyCompactUnicodeObject*)text) + 1;
    }
    else {
        data = ((PyUnicodeObject *)text)->data.any;
        if (data == NULL)
            return;
    }

    if (MAX_STRING_LENGTH < size) {
        size = MAX_STRING_LENGTH;
        truncated = 1;
    }
    else {
        truncated = 0;
    }

    for (i=0; i < size; i++) {
        if (kind != PyUnicode_WCHAR_KIND)
            ch = PyUnicode_READ(kind, data, i);
        else
            ch = wstr[i];
        if (' ' <= ch && ch <= 126) {
            /* printable ASCII character */
            char c = (char)ch;
            _Py_write_noraise(fd, &c, 1);
        }
        else if (ch <= 0xff) {
            PUTS(fd, "\\x");
            _Py_DumpHexadecimal(fd, ch, 2);
        }
        else if (ch <= 0xffff) {
            PUTS(fd, "\\u");
            _Py_DumpHexadecimal(fd, ch, 4);
        }
        else {
            PUTS(fd, "\\U");
            _Py_DumpHexadecimal(fd, ch, 8);
        }
    }
    if (truncated) {
        PUTS(fd, "...");
    }
}
Ejemplo n.º 12
0
Archivo: _csv.c Proyecto: 1st1/cpython
/* Calculate new record length or append field to record.  Return new
 * record length.
 */
static Py_ssize_t
join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
                 Py_ssize_t field_len, int *quoted,
                 int copy_phase)
{
    DialectObj *dialect = self->dialect;
    int i;
    Py_ssize_t rec_len;

#define INCLEN \
    do {\
        if (!copy_phase && rec_len == PY_SSIZE_T_MAX) {    \
            goto overflow; \
        } \
        rec_len++; \
    } while(0)

#define ADDCH(c)                                \
    do {\
        if (copy_phase) \
            self->rec[rec_len] = c;\
        INCLEN;\
    } while(0)

    rec_len = self->rec_len;

    /* If this is not the first field we need a field separator */
    if (self->num_fields > 0)
        ADDCH(dialect->delimiter);

    /* Handle preceding quote */
    if (copy_phase && *quoted)
        ADDCH(dialect->quotechar);

    /* Copy/count field data */
    /* If field is null just pass over */
    for (i = 0; field_data && (i < field_len); i++) {
        Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
        int want_escape = 0;

        if (c == dialect->delimiter ||
            c == dialect->escapechar ||
            c == dialect->quotechar  ||
            PyUnicode_FindChar(
                dialect->lineterminator, c, 0,
                PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
            if (dialect->quoting == QUOTE_NONE)
                want_escape = 1;
            else {
                if (c == dialect->quotechar) {
                    if (dialect->doublequote)
                        ADDCH(dialect->quotechar);
                    else
                        want_escape = 1;
                }
                if (!want_escape)
                    *quoted = 1;
            }
            if (want_escape) {
                if (!dialect->escapechar) {
                    PyErr_Format(_csvstate_global->error_obj,
                                 "need to escape, but no escapechar set");
                    return -1;
                }
                ADDCH(dialect->escapechar);
            }
        }
        /* Copy field character into record buffer.
         */
        ADDCH(c);
    }

    if (*quoted) {
        if (copy_phase)
            ADDCH(dialect->quotechar);
        else {
            INCLEN; /* starting quote */
            INCLEN; /* ending quote */
        }
    }
    return rec_len;

  overflow:
    PyErr_NoMemory();
    return -1;
#undef ADDCH
#undef INCLEN
}
Ejemplo n.º 13
0
static PyObject *
parse(PyObject *self, PyObject *args, PyObject *kwargs)
{
    PyObject *string;

    if (!PyArg_ParseTuple(args, "U", &string))
        return NULL;

    if (PyUnicode_READY(string) == -1)
        return NULL;

    Py_ssize_t string_length = PyUnicode_GET_LENGTH(string);
    int        string_kind   = PyUnicode_KIND(string);
    void      *string_data   = PyUnicode_DATA(string);

    Py_UCS4 c;
    int i;

    Symbol token = {.kind = UNDECIDED, .data=NULL};
    int escape = 0;

    for (i = 0; i < string_length; i++) {
        c = PyUnicode_READ(string_kind, string_data, i);

        switch (c) {
        case 0x22: // "
            if (escape) {

            } else {

            }

            break;

        case 0x27: // '
            if (escape) {

            } else {

            }

            break;

        case 0x30:
        case 0x31:
        case 0x32:
        case 0x33:
        case 0x34:
        case 0x35:
        case 0x36:
        case 0x37:
        case 0x38:
        case 0x39:
            switch (token.kind) {
            case UNDECIDED:
                token.kind = INTEGER;
                token.data = c - 0x30;
                break;

            case INTEGER:
                token.data += (c - 0x30);
                break;

            }

        case 0x5c: // backslash
            if (escape) {
                printf("todo literal backslash\n");
                escape = 0;
            } else {
                escape = 1;
            }

            break;

        CASE__WHITE_SPACE:
            switch (token.kind) {
            case INTEGER:
            case IDENTIFIER:
                printf("whitespace new token\n");
                break;
            }

            break;

        CASE__XID_START:
            switch (token.kind) {
            case UNDECIDED:
                token.kind = IDENTIFIER;
                break;

            case INTEGER:
                printf("todo\n");
                break;
            }

            break;

        CASE__XID_CONTINUE__EXCLUDING__XID_START:
            switch (token.kind) {
            case UNDECIDED:
                printf("continue start?\n");
                break;

            case INTEGER:
                printf("continue integer\n");
                break;
            }

            break;

        default:
            printf("unclassified");
        }
    }

    PyErr_SetString(PyExc_NotImplementedError, "parse");
    return NULL;
}
Ejemplo n.º 14
0
/* define unicode version of xmlescape */
static PyObject *xmlescape_str(PyObject *str, int doquot, int doapos)
{
	Py_ssize_t oldsize;
	void *olddata;
	int maxchar = 127;
	Py_ssize_t i;
	Py_ssize_t newsize = 0;
	void *newdata;

	int kind = PyUnicode_KIND(str);

	oldsize = PyUnicode_GET_LENGTH(str);
	olddata = PyUnicode_DATA(str);
	for (i = 0; i < oldsize; ++i)
	{
		Py_UCS4 ch = PyUnicode_READ(kind, olddata, i);
		if (ch == ((Py_UCS4)'<'))
			newsize += 4; /* &lt; */
		else if (ch == (Py_UCS4)'>') /* Note that we always replace '>' with its entity, not just in case it is part of ']]>' */
			newsize += 4; /* &gt; */
		else if (ch == (Py_UCS4)'&')
			newsize += 5; /* &amp; */
		else if ((ch == (Py_UCS4)'"') && doquot)
			newsize += 6; /* &quot; */
		else if ((ch == (Py_UCS4)'\'') && doapos)
			newsize += 5; /* &#39; */
		else if (ch <= 0x8)
			newsize += 4;
		else if ((ch >= 0xB) && (ch <= 0x1F) && (ch != 0xD))
			newsize += 5;
		else if ((ch >= 0x7F) && (ch <= 0x9F) && (ch != 0x85))
			newsize += 6;
		else
		{
			newsize++;
			if (ch > maxchar)
				maxchar = ch;
		}
	}
	if (oldsize==newsize)
	{
		/* nothing to replace => return original */
		Py_INCREF(str);
		return str;
	}
	else
	{
		int index = 0;
		PyObject *result = PyUnicode_New(newsize, maxchar);
		newdata = PyUnicode_DATA(result);
		if (result == NULL)
			return NULL;
		for (i = 0; i < oldsize; ++i)
		{
			Py_UCS4 ch = PyUnicode_READ(kind, olddata, i);
			if (ch == (Py_UCS4)'<')
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, 'l');
				PyUnicode_WRITE(kind, newdata, index++, 't');
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if (ch == (Py_UCS4)'>')
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, 'g');
				PyUnicode_WRITE(kind, newdata, index++, 't');
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if (ch == (Py_UCS4)'&')
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, 'a');
				PyUnicode_WRITE(kind, newdata, index++, 'm');
				PyUnicode_WRITE(kind, newdata, index++, 'p');
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if ((ch == (Py_UCS4)'"') && doquot)
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, 'q');
				PyUnicode_WRITE(kind, newdata, index++, 'u');
				PyUnicode_WRITE(kind, newdata, index++, 'o');
				PyUnicode_WRITE(kind, newdata, index++, 't');
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if ((ch == (Py_UCS4)'\'') && doapos)
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, '#');
				PyUnicode_WRITE(kind, newdata, index++, '3');
				PyUnicode_WRITE(kind, newdata, index++, '9');
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if (ch <= 0x8)
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, '#');
				PyUnicode_WRITE(kind, newdata, index++, '0'+ch);
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if ((ch >= 0xB) && (ch <= 0x1F) && (ch != 0xD))
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, '#');
				PyUnicode_WRITE(kind, newdata, index++, '0'+ch/10);
				PyUnicode_WRITE(kind, newdata, index++, '0'+ch%10);
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if ((ch >= 0x7F) && (ch <= 0x9F) && (ch != 0x85))
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, '#');
				PyUnicode_WRITE(kind, newdata, index++, '0'+ch/100);
				PyUnicode_WRITE(kind, newdata, index++, '0'+(ch/10)%10);
				PyUnicode_WRITE(kind, newdata, index++, '0'+ch%10);
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else
				PyUnicode_WRITE(kind, newdata, index++, ch);
		}
		return result;
	}
}