PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) { PyObject *object; Py_ssize_t i; Py_ssize_t start; Py_ssize_t end; PyObject *res; unsigned char *outp; int ressize; Py_UCS4 c; if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) { const unsigned char *p; if (PyUnicodeDecodeError_GetStart(exc, &start)) return NULL; if (PyUnicodeDecodeError_GetEnd(exc, &end)) return NULL; if (!(object = PyUnicodeDecodeError_GetObject(exc))) return NULL; p = (const unsigned char*)PyBytes_AS_STRING(object); res = PyUnicode_New(4 * (end - start), 127); if (res == NULL) { Py_DECREF(object); return NULL; } outp = PyUnicode_1BYTE_DATA(res); for (i = start; i < end; i++, outp += 4) { unsigned char c = p[i]; outp[0] = '\\'; outp[1] = 'x'; outp[2] = Py_hexdigits[(c>>4)&0xf]; outp[3] = Py_hexdigits[c&0xf]; } assert(_PyUnicode_CheckConsistency(res, 1)); Py_DECREF(object); return Py_BuildValue("(Nn)", res, end); } if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { if (PyUnicodeEncodeError_GetStart(exc, &start)) return NULL; if (PyUnicodeEncodeError_GetEnd(exc, &end)) return NULL; if (!(object = PyUnicodeEncodeError_GetObject(exc))) return NULL; } else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) { if (PyUnicodeTranslateError_GetStart(exc, &start)) return NULL; if (PyUnicodeTranslateError_GetEnd(exc, &end)) return NULL; if (!(object = PyUnicodeTranslateError_GetObject(exc))) return NULL; } else { wrong_exception_type(exc); return NULL; } if (end - start > PY_SSIZE_T_MAX / (1+1+8)) end = start + PY_SSIZE_T_MAX / (1+1+8); for (i = start, ressize = 0; i < end; ++i) { /* object is guaranteed to be "ready" */ c = PyUnicode_READ_CHAR(object, i); if (c >= 0x10000) { ressize += 1+1+8; } else if (c >= 0x100) { ressize += 1+1+4; } else ressize += 1+1+2; } res = PyUnicode_New(ressize, 127); if (res == NULL) { Py_DECREF(object); return NULL; } outp = PyUnicode_1BYTE_DATA(res); for (i = start; i < end; ++i) { c = PyUnicode_READ_CHAR(object, i); *outp++ = '\\'; if (c >= 0x00010000) { *outp++ = 'U'; *outp++ = Py_hexdigits[(c>>28)&0xf]; *outp++ = Py_hexdigits[(c>>24)&0xf]; *outp++ = Py_hexdigits[(c>>20)&0xf]; *outp++ = Py_hexdigits[(c>>16)&0xf]; *outp++ = Py_hexdigits[(c>>12)&0xf]; *outp++ = Py_hexdigits[(c>>8)&0xf]; } else if (c >= 0x100) {
PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc) { if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { PyObject *restuple; PyObject *object; Py_ssize_t i; Py_ssize_t start; Py_ssize_t end; PyObject *res; unsigned char *outp; int ressize; Py_UCS4 ch; if (PyUnicodeEncodeError_GetStart(exc, &start)) return NULL; if (PyUnicodeEncodeError_GetEnd(exc, &end)) return NULL; if (!(object = PyUnicodeEncodeError_GetObject(exc))) return NULL; for (i = start, ressize = 0; i < end; ++i) { /* object is guaranteed to be "ready" */ ch = PyUnicode_READ_CHAR(object, i); if (ch<10) ressize += 2+1+1; else if (ch<100) ressize += 2+2+1; else if (ch<1000) ressize += 2+3+1; else if (ch<10000) ressize += 2+4+1; else if (ch<100000) ressize += 2+5+1; else if (ch<1000000) ressize += 2+6+1; else ressize += 2+7+1; } /* allocate replacement */ res = PyUnicode_New(ressize, 127); if (res == NULL) { Py_DECREF(object); return NULL; } outp = PyUnicode_1BYTE_DATA(res); /* generate replacement */ for (i = start; i < end; ++i) { int digits; int base; ch = PyUnicode_READ_CHAR(object, i); *outp++ = '&'; *outp++ = '#'; if (ch<10) { digits = 1; base = 1; } else if (ch<100) { digits = 2; base = 10; } else if (ch<1000) { digits = 3; base = 100; } else if (ch<10000) { digits = 4; base = 1000; } else if (ch<100000) { digits = 5; base = 10000; } else if (ch<1000000) { digits = 6; base = 100000; } else { digits = 7; base = 1000000; } while (digits-->0) { *outp++ = '0' + ch/base; ch %= base; base /= 10; } *outp++ = ';'; } assert(_PyUnicode_CheckConsistency(res, 1)); restuple = Py_BuildValue("(Nn)", res, end); Py_DECREF(object); return restuple; } else { wrong_exception_type(exc); return NULL; } }
PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc) { if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { PyObject *restuple; PyObject *object; Py_ssize_t start; Py_ssize_t end; PyObject *res; Py_UNICODE *p; Py_UNICODE *startp; Py_UNICODE *outp; int ressize; if (PyUnicodeEncodeError_GetStart(exc, &start)) return NULL; if (PyUnicodeEncodeError_GetEnd(exc, &end)) return NULL; if (!(object = PyUnicodeEncodeError_GetObject(exc))) return NULL; startp = PyUnicode_AS_UNICODE(object); for (p = startp+start, ressize = 0; p < startp+end; ++p) { if (*p<10) ressize += 2+1+1; else if (*p<100) ressize += 2+2+1; else if (*p<1000) ressize += 2+3+1; else if (*p<10000) ressize += 2+4+1; #ifndef Py_UNICODE_WIDE else ressize += 2+5+1; #else else if (*p<100000) ressize += 2+5+1; else if (*p<1000000) ressize += 2+6+1; else ressize += 2+7+1; #endif } /* allocate replacement */ res = PyUnicode_FromUnicode(NULL, ressize); if (res == NULL) { Py_DECREF(object); return NULL; } /* generate replacement */ for (p = startp+start, outp = PyUnicode_AS_UNICODE(res); p < startp+end; ++p) { Py_UNICODE c = *p; int digits; int base; *outp++ = '&'; *outp++ = '#'; if (*p<10) { digits = 1; base = 1; } else if (*p<100) { digits = 2; base = 10; } else if (*p<1000) { digits = 3; base = 100; } else if (*p<10000) { digits = 4; base = 1000; } #ifndef Py_UNICODE_WIDE else { digits = 5; base = 10000; } #else else if (*p<100000) { digits = 5; base = 10000; } else if (*p<1000000) { digits = 6; base = 100000; } else { digits = 7; base = 1000000; } #endif while (digits-->0) { *outp++ = '0' + c/base; c %= base; base /= 10; } *outp++ = ';'; }