Пример #1
0
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
{
    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
        PyObject *restuple;
        PyObject *object;
        Py_ssize_t start;
        Py_ssize_t end;
        PyObject *res;
        Py_UNICODE *p;
        Py_UNICODE *startp;
        Py_UNICODE *outp;
        int ressize;
        if (PyUnicodeEncodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
            return NULL;
        startp = PyUnicode_AS_UNICODE(object);
        for (p = startp+start, ressize = 0; p < startp+end; ++p) {
#ifdef Py_UNICODE_WIDE
            if (*p >= 0x00010000)
                ressize += 1+1+8;
            else
#endif
            if (*p >= 0x100) {
                ressize += 1+1+4;
            }
            else
                ressize += 1+1+2;
        }
        res = PyUnicode_FromUnicode(NULL, ressize);
        if (res==NULL)
            return NULL;
        for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
            p < startp+end; ++p) {
            Py_UNICODE c = *p;
            *outp++ = '\\';
#ifdef Py_UNICODE_WIDE
            if (c >= 0x00010000) {
                *outp++ = 'U';
                *outp++ = hexdigits[(c>>28)&0xf];
                *outp++ = hexdigits[(c>>24)&0xf];
                *outp++ = hexdigits[(c>>20)&0xf];
                *outp++ = hexdigits[(c>>16)&0xf];
                *outp++ = hexdigits[(c>>12)&0xf];
                *outp++ = hexdigits[(c>>8)&0xf];
            }
            else
#endif
            if (c >= 0x100) {
                *outp++ = 'u';
                *outp++ = hexdigits[(c>>12)&0xf];
                *outp++ = hexdigits[(c>>8)&0xf];
            }
Пример #2
0
PyObject *PyCodec_ReplaceErrors(PyObject *exc)
{
    Py_ssize_t start, end, i, len;

    if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
        PyObject *res;
        int kind;
        void *data;
        if (PyUnicodeEncodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
        len = end - start;
        res = PyUnicode_New(len, '?');
        if (res == NULL)
            return NULL;
        kind = PyUnicode_KIND(res);
        data = PyUnicode_DATA(res);
        for (i = 0; i < len; ++i)
            PyUnicode_WRITE(kind, data, i, '?');
        assert(_PyUnicode_CheckConsistency(res, 1));
        return Py_BuildValue("(Nn)", res, end);
    }
    else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
        if (PyUnicodeDecodeError_GetEnd(exc, &end))
            return NULL;
        return Py_BuildValue("(Cn)",
                             (int)Py_UNICODE_REPLACEMENT_CHARACTER,
                             end);
    }
    else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) {
        PyObject *res;
        int kind;
        void *data;
        if (PyUnicodeTranslateError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeTranslateError_GetEnd(exc, &end))
            return NULL;
        len = end - start;
        res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER);
        if (res == NULL)
            return NULL;
        kind = PyUnicode_KIND(res);
        data = PyUnicode_DATA(res);
        for (i=0; i < len; i++)
            PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER);
        assert(_PyUnicode_CheckConsistency(res, 1));
        return Py_BuildValue("(Nn)", res, end);
    }
    else {
        wrong_exception_type(exc);
        return NULL;
    }
}
Пример #3
0
PyObject *PyCodec_ReplaceErrors(PyObject *exc)
{
    PyObject *restuple;
    Py_ssize_t start;
    Py_ssize_t end;
    Py_ssize_t i;

    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
        PyObject *res;
        Py_UNICODE *p;
        if (PyUnicodeEncodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
        res = PyUnicode_FromUnicode(NULL, end-start);
        if (res == NULL)
            return NULL;
        for (p = PyUnicode_AS_UNICODE(res), i = start;
            i<end; ++p, ++i)
            *p = '?';
        restuple = Py_BuildValue("(On)", res, end);
        Py_DECREF(res);
        return restuple;
    }
    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
        Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
        if (PyUnicodeDecodeError_GetEnd(exc, &end))
            return NULL;
        return Py_BuildValue("(u#n)", &res, (Py_ssize_t)1, end);
    }
    else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
        PyObject *res;
        Py_UNICODE *p;
        if (PyUnicodeTranslateError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeTranslateError_GetEnd(exc, &end))
            return NULL;
        res = PyUnicode_FromUnicode(NULL, end-start);
        if (res == NULL)
            return NULL;
        for (p = PyUnicode_AS_UNICODE(res), i = start;
            i<end; ++p, ++i)
            *p = Py_UNICODE_REPLACEMENT_CHARACTER;
        restuple = Py_BuildValue("(On)", res, end);
        Py_DECREF(res);
        return restuple;
    }
    else {
        wrong_exception_type(exc);
        return NULL;
    }
}
Пример #4
0
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
{
    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
        PyObject *restuple;
        PyObject *object;
        Py_ssize_t i;
        Py_ssize_t start;
        Py_ssize_t end;
        PyObject *res;
        unsigned char *outp;
        int ressize;
        Py_UCS4 c;
        if (PyUnicodeEncodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
            return NULL;
        for (i = start, ressize = 0; i < end; ++i) {
            /* object is guaranteed to be "ready" */
            c = PyUnicode_READ_CHAR(object, i);
            if (c >= 0x10000) {
                ressize += 1+1+8;
            }
            else if (c >= 0x100) {
                ressize += 1+1+4;
            }
            else
                ressize += 1+1+2;
        }
        res = PyUnicode_New(ressize, 127);
        if (res==NULL)
            return NULL;
        for (i = start, outp = PyUnicode_1BYTE_DATA(res);
            i < end; ++i) {
            c = PyUnicode_READ_CHAR(object, i);
            *outp++ = '\\';
            if (c >= 0x00010000) {
                *outp++ = 'U';
                *outp++ = Py_hexdigits[(c>>28)&0xf];
                *outp++ = Py_hexdigits[(c>>24)&0xf];
                *outp++ = Py_hexdigits[(c>>20)&0xf];
                *outp++ = Py_hexdigits[(c>>16)&0xf];
                *outp++ = Py_hexdigits[(c>>12)&0xf];
                *outp++ = Py_hexdigits[(c>>8)&0xf];
            }
            else if (c >= 0x100) {
                *outp++ = 'u';
                *outp++ = Py_hexdigits[(c>>12)&0xf];
                *outp++ = Py_hexdigits[(c>>8)&0xf];
            }
Пример #5
0
PyObject* PyCodec_IgnoreErrors(PyObject* exc) noexcept {
    Py_ssize_t end;
    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
    } else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
        if (PyUnicodeDecodeError_GetEnd(exc, &end))
            return NULL;
    } else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
        if (PyUnicodeTranslateError_GetEnd(exc, &end))
            return NULL;
    } else {
        wrong_exception_type(exc);
        return NULL;
    }
    /* ouch: passing NULL, 0, pos gives None instead of u'' */
    return Py_BuildValue("(u#n)", &end, 0, end);
}
Пример #6
0
PyObject *PyCodec_IgnoreErrors(PyObject *exc)
{
    Py_ssize_t end;
    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
    }
    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
        if (PyUnicodeDecodeError_GetEnd(exc, &end))
            return NULL;
    }
    else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
        if (PyUnicodeTranslateError_GetEnd(exc, &end))
            return NULL;
    }
    else {
        wrong_exception_type(exc);
        return NULL;
    }
    return Py_BuildValue("(Nn)", PyUnicode_New(0, 0), end);
}
Пример #7
0
PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
{
    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
        PyObject *restuple;
        PyObject *object;
        Py_ssize_t start;
        Py_ssize_t end;
        PyObject *res;
        Py_UNICODE *p;
        Py_UNICODE *startp;
        Py_UNICODE *e;
        Py_UNICODE *outp;
        int ressize;
        if (PyUnicodeEncodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
            return NULL;
        startp = PyUnicode_AS_UNICODE(object);
        e = startp + end;
        for (p = startp+start, ressize = 0; p < e;) {
            Py_UCS4 ch = *p++;
#ifndef Py_UNICODE_WIDE
            if ((0xD800 <= ch && ch <= 0xDBFF) &&
                (p < e) &&
                (0xDC00 <= *p && *p <= 0xDFFF)) {
                ch = ((((ch & 0x03FF) << 10) |
                       ((Py_UCS4)*p++ & 0x03FF)) + 0x10000);
            }
#endif
            if (ch < 10)
                ressize += 2+1+1;
            else if (ch < 100)
                ressize += 2+2+1;
            else if (ch < 1000)
                ressize += 2+3+1;
            else if (ch < 10000)
                ressize += 2+4+1;
            else if (ch < 100000)
                ressize += 2+5+1;
            else if (ch < 1000000)
                ressize += 2+6+1;
            else
                ressize += 2+7+1;
        }
        /* allocate replacement */
        res = PyUnicode_FromUnicode(NULL, ressize);
        if (res == NULL) {
            Py_DECREF(object);
            return NULL;
        }
        /* generate replacement */
        for (p = startp+start, outp = PyUnicode_AS_UNICODE(res); p < e;) {
            int digits;
            int base;
            Py_UCS4 ch = *p++;
#ifndef Py_UNICODE_WIDE
            if ((0xD800 <= ch && ch <= 0xDBFF) &&
                (p < startp+end) &&
                (0xDC00 <= *p && *p <= 0xDFFF)) {
                ch = ((((ch & 0x03FF) << 10) |
                       ((Py_UCS4)*p++ & 0x03FF)) + 0x10000);
            }
#endif
            *outp++ = '&';
            *outp++ = '#';
            if (ch < 10) {
                digits = 1;
                base = 1;
            }
            else if (ch < 100) {
                digits = 2;
                base = 10;
            }
            else if (ch < 1000) {
                digits = 3;
                base = 100;
            }
            else if (ch < 10000) {
                digits = 4;
                base = 1000;
            }
            else if (ch < 100000) {
                digits = 5;
                base = 10000;
            }
            else if (ch < 1000000) {
                digits = 6;
                base = 100000;
            }
            else {
                digits = 7;
                base = 1000000;
            }
            while (digits-->0) {
                *outp++ = '0' + ch/base;
                ch %= base;
                base /= 10;
            }
            *outp++ = ';';
        }
        restuple = Py_BuildValue("(On)", res, end);
        Py_DECREF(res);
        Py_DECREF(object);
        return restuple;
    }
    else {
        wrong_exception_type(exc);
        return NULL;
    }
}
Пример #8
0
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
{
    PyObject *object;
    Py_ssize_t i;
    Py_ssize_t start;
    Py_ssize_t end;
    PyObject *res;
    unsigned char *outp;
    int ressize;
    Py_UCS4 c;

    if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
        const unsigned char *p;
        if (PyUnicodeDecodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeDecodeError_GetEnd(exc, &end))
            return NULL;
        if (!(object = PyUnicodeDecodeError_GetObject(exc)))
            return NULL;
        p = (const unsigned char*)PyBytes_AS_STRING(object);
        res = PyUnicode_New(4 * (end - start), 127);
        if (res == NULL) {
            Py_DECREF(object);
            return NULL;
        }
        outp = PyUnicode_1BYTE_DATA(res);
        for (i = start; i < end; i++, outp += 4) {
            unsigned char c = p[i];
            outp[0] = '\\';
            outp[1] = 'x';
            outp[2] = Py_hexdigits[(c>>4)&0xf];
            outp[3] = Py_hexdigits[c&0xf];
        }

        assert(_PyUnicode_CheckConsistency(res, 1));
        Py_DECREF(object);
        return Py_BuildValue("(Nn)", res, end);
    }
    if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
        if (PyUnicodeEncodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
            return NULL;
    }
    else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) {
        if (PyUnicodeTranslateError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeTranslateError_GetEnd(exc, &end))
            return NULL;
        if (!(object = PyUnicodeTranslateError_GetObject(exc)))
            return NULL;
    }
    else {
        wrong_exception_type(exc);
        return NULL;
    }

    if (end - start > PY_SSIZE_T_MAX / (1+1+8))
        end = start + PY_SSIZE_T_MAX / (1+1+8);
    for (i = start, ressize = 0; i < end; ++i) {
        /* object is guaranteed to be "ready" */
        c = PyUnicode_READ_CHAR(object, i);
        if (c >= 0x10000) {
            ressize += 1+1+8;
        }
        else if (c >= 0x100) {
            ressize += 1+1+4;
        }
        else
            ressize += 1+1+2;
    }
    res = PyUnicode_New(ressize, 127);
    if (res == NULL) {
        Py_DECREF(object);
        return NULL;
    }
    outp = PyUnicode_1BYTE_DATA(res);
    for (i = start; i < end; ++i) {
        c = PyUnicode_READ_CHAR(object, i);
        *outp++ = '\\';
        if (c >= 0x00010000) {
            *outp++ = 'U';
            *outp++ = Py_hexdigits[(c>>28)&0xf];
            *outp++ = Py_hexdigits[(c>>24)&0xf];
            *outp++ = Py_hexdigits[(c>>20)&0xf];
            *outp++ = Py_hexdigits[(c>>16)&0xf];
            *outp++ = Py_hexdigits[(c>>12)&0xf];
            *outp++ = Py_hexdigits[(c>>8)&0xf];
        }
        else if (c >= 0x100) {
Пример #9
0
PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
{
    if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
        PyObject *restuple;
        PyObject *object;
        Py_ssize_t i;
        Py_ssize_t start;
        Py_ssize_t end;
        PyObject *res;
        unsigned char *outp;
        Py_ssize_t ressize;
        Py_UCS4 ch;
        if (PyUnicodeEncodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
            return NULL;
        if (end - start > PY_SSIZE_T_MAX / (2+7+1))
            end = start + PY_SSIZE_T_MAX / (2+7+1);
        for (i = start, ressize = 0; i < end; ++i) {
            /* object is guaranteed to be "ready" */
            ch = PyUnicode_READ_CHAR(object, i);
            if (ch<10)
                ressize += 2+1+1;
            else if (ch<100)
                ressize += 2+2+1;
            else if (ch<1000)
                ressize += 2+3+1;
            else if (ch<10000)
                ressize += 2+4+1;
            else if (ch<100000)
                ressize += 2+5+1;
            else if (ch<1000000)
                ressize += 2+6+1;
            else
                ressize += 2+7+1;
        }
        /* allocate replacement */
        res = PyUnicode_New(ressize, 127);
        if (res == NULL) {
            Py_DECREF(object);
            return NULL;
        }
        outp = PyUnicode_1BYTE_DATA(res);
        /* generate replacement */
        for (i = start; i < end; ++i) {
            int digits;
            int base;
            ch = PyUnicode_READ_CHAR(object, i);
            *outp++ = '&';
            *outp++ = '#';
            if (ch<10) {
                digits = 1;
                base = 1;
            }
            else if (ch<100) {
                digits = 2;
                base = 10;
            }
            else if (ch<1000) {
                digits = 3;
                base = 100;
            }
            else if (ch<10000) {
                digits = 4;
                base = 1000;
            }
            else if (ch<100000) {
                digits = 5;
                base = 10000;
            }
            else if (ch<1000000) {
                digits = 6;
                base = 100000;
            }
            else {
                digits = 7;
                base = 1000000;
            }
            while (digits-->0) {
                *outp++ = '0' + ch/base;
                ch %= base;
                base /= 10;
            }
            *outp++ = ';';
        }
        assert(_PyUnicode_CheckConsistency(res, 1));
        restuple = Py_BuildValue("(Nn)", res, end);
        Py_DECREF(object);
        return restuple;
    }
    else {
        wrong_exception_type(exc);
        return NULL;
    }
}
Пример #10
0
PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
{
    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
	PyObject *restuple;
	PyObject *object;
	Py_ssize_t start;
	Py_ssize_t end;
	PyObject *res;
	Py_UNICODE *p;
	Py_UNICODE *startp;
	Py_UNICODE *outp;
	int ressize;
	if (PyUnicodeEncodeError_GetStart(exc, &start))
	    return NULL;
	if (PyUnicodeEncodeError_GetEnd(exc, &end))
	    return NULL;
	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
	    return NULL;
	startp = PyUnicode_AS_UNICODE(object);
	for (p = startp+start, ressize = 0; p < startp+end; ++p) {
	    if (*p<10)
		ressize += 2+1+1;
	    else if (*p<100)
		ressize += 2+2+1;
	    else if (*p<1000)
		ressize += 2+3+1;
	    else if (*p<10000)
		ressize += 2+4+1;
#ifndef Py_UNICODE_WIDE
	    else
		ressize += 2+5+1;
#else
	    else if (*p<100000)
		ressize += 2+5+1;
	    else if (*p<1000000)
		ressize += 2+6+1;
	    else
		ressize += 2+7+1;
#endif
	}
	/* allocate replacement */
	res = PyUnicode_FromUnicode(NULL, ressize);
	if (res == NULL) {
	    Py_DECREF(object);
	    return NULL;
	}
	/* generate replacement */
	for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
	    p < startp+end; ++p) {
	    Py_UNICODE c = *p;
	    int digits;
	    int base;
	    *outp++ = '&';
	    *outp++ = '#';
	    if (*p<10) {
		digits = 1;
		base = 1;
	    }
	    else if (*p<100) {
		digits = 2;
		base = 10;
	    }
	    else if (*p<1000) {
		digits = 3;
		base = 100;
	    }
	    else if (*p<10000) {
		digits = 4;
		base = 1000;
	    }
#ifndef Py_UNICODE_WIDE
	    else {
		digits = 5;
		base = 10000;
	    }
#else
	    else if (*p<100000) {
		digits = 5;
		base = 10000;
	    }
	    else if (*p<1000000) {
		digits = 6;
		base = 100000;
	    }
	    else {
		digits = 7;
		base = 1000000;
	    }
#endif
	    while (digits-->0) {
		*outp++ = '0' + c/base;
		c %= base;
		base /= 10;
	    }
	    *outp++ = ';';
	}