Example #1
0
PyObject *PyCodec_ReplaceErrors(PyObject *exc)
{
    Py_ssize_t start, end, i, len;

    if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
        PyObject *res;
        int kind;
        void *data;
        if (PyUnicodeEncodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
        len = end - start;
        res = PyUnicode_New(len, '?');
        if (res == NULL)
            return NULL;
        kind = PyUnicode_KIND(res);
        data = PyUnicode_DATA(res);
        for (i = 0; i < len; ++i)
            PyUnicode_WRITE(kind, data, i, '?');
        assert(_PyUnicode_CheckConsistency(res, 1));
        return Py_BuildValue("(Nn)", res, end);
    }
    else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
        if (PyUnicodeDecodeError_GetEnd(exc, &end))
            return NULL;
        return Py_BuildValue("(Cn)",
                             (int)Py_UNICODE_REPLACEMENT_CHARACTER,
                             end);
    }
    else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) {
        PyObject *res;
        int kind;
        void *data;
        if (PyUnicodeTranslateError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeTranslateError_GetEnd(exc, &end))
            return NULL;
        len = end - start;
        res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER);
        if (res == NULL)
            return NULL;
        kind = PyUnicode_KIND(res);
        data = PyUnicode_DATA(res);
        for (i=0; i < len; i++)
            PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER);
        assert(_PyUnicode_CheckConsistency(res, 1));
        return Py_BuildValue("(Nn)", res, end);
    }
    else {
        wrong_exception_type(exc);
        return NULL;
    }
}
Example #2
0
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
{
    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
        PyObject *restuple;
        PyObject *object;
        Py_ssize_t i;
        Py_ssize_t start;
        Py_ssize_t end;
        PyObject *res;
        unsigned char *outp;
        int ressize;
        Py_UCS4 c;
        if (PyUnicodeEncodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
            return NULL;
        for (i = start, ressize = 0; i < end; ++i) {
            /* object is guaranteed to be "ready" */
            c = PyUnicode_READ_CHAR(object, i);
            if (c >= 0x10000) {
                ressize += 1+1+8;
            }
            else if (c >= 0x100) {
                ressize += 1+1+4;
            }
            else
                ressize += 1+1+2;
        }
        res = PyUnicode_New(ressize, 127);
        if (res==NULL)
            return NULL;
        for (i = start, outp = PyUnicode_1BYTE_DATA(res);
            i < end; ++i) {
            c = PyUnicode_READ_CHAR(object, i);
            *outp++ = '\\';
            if (c >= 0x00010000) {
                *outp++ = 'U';
                *outp++ = Py_hexdigits[(c>>28)&0xf];
                *outp++ = Py_hexdigits[(c>>24)&0xf];
                *outp++ = Py_hexdigits[(c>>20)&0xf];
                *outp++ = Py_hexdigits[(c>>16)&0xf];
                *outp++ = Py_hexdigits[(c>>12)&0xf];
                *outp++ = Py_hexdigits[(c>>8)&0xf];
            }
            else if (c >= 0x100) {
                *outp++ = 'u';
                *outp++ = Py_hexdigits[(c>>12)&0xf];
                *outp++ = Py_hexdigits[(c>>8)&0xf];
            }
Example #3
0
/* Find the decimal point character(s?), thousands_separator(s?), and
   grouping description, either for the current locale if type is
   LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
   none if LT_NO_LOCALE. */
static int
get_locale_info(int type, LocaleInfo *locale_info)
{
    switch (type) {
    case LT_CURRENT_LOCALE: {
        struct lconv *locale_data = localeconv();
        locale_info->decimal_point = PyUnicode_DecodeLocale(
                                         locale_data->decimal_point,
                                         NULL);
        if (locale_info->decimal_point == NULL)
            return -1;
        locale_info->thousands_sep = PyUnicode_DecodeLocale(
                                         locale_data->thousands_sep,
                                         NULL);
        if (locale_info->thousands_sep == NULL) {
            Py_DECREF(locale_info->decimal_point);
            return -1;
        }
        locale_info->grouping = locale_data->grouping;
        break;
    }
    case LT_DEFAULT_LOCALE:
        locale_info->decimal_point = PyUnicode_FromOrdinal('.');
        locale_info->thousands_sep = PyUnicode_FromOrdinal(',');
        if (!locale_info->decimal_point || !locale_info->thousands_sep) {
            Py_XDECREF(locale_info->decimal_point);
            Py_XDECREF(locale_info->thousands_sep);
            return -1;
        }
        locale_info->grouping = "\3"; /* Group every 3 characters.  The
                                         (implicit) trailing 0 means repeat
                                         infinitely. */
        break;
    case LT_NO_LOCALE:
        locale_info->decimal_point = PyUnicode_FromOrdinal('.');
        locale_info->thousands_sep = PyUnicode_New(0, 0);
        if (!locale_info->decimal_point || !locale_info->thousands_sep) {
            Py_XDECREF(locale_info->decimal_point);
            Py_XDECREF(locale_info->thousands_sep);
            return -1;
        }
        locale_info->grouping = no_grouping;
        break;
    default:
        assert(0);
    }
    return 0;
}
Example #4
0
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
{
    PyObject *object;
    Py_ssize_t i;
    Py_ssize_t start;
    Py_ssize_t end;
    PyObject *res;
    unsigned char *outp;
    int ressize;
    Py_UCS4 c;

    if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
        unsigned char *p;
        if (PyUnicodeDecodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeDecodeError_GetEnd(exc, &end))
            return NULL;
        if (!(object = PyUnicodeDecodeError_GetObject(exc)))
            return NULL;
        if (!(p = (unsigned char*)PyBytes_AsString(object))) {
            Py_DECREF(object);
            return NULL;
        }
        res = PyUnicode_New(4 * (end - start), 127);
        if (res == NULL) {
            Py_DECREF(object);
            return NULL;
        }
        outp = PyUnicode_1BYTE_DATA(res);
        for (i = start; i < end; i++, outp += 4) {
            unsigned char c = p[i];
            outp[0] = '\\';
            outp[1] = 'x';
            outp[2] = Py_hexdigits[(c>>4)&0xf];
            outp[3] = Py_hexdigits[c&0xf];
        }

        assert(_PyUnicode_CheckConsistency(res, 1));
        Py_DECREF(object);
        return Py_BuildValue("(Nn)", res, end);
    }
Example #5
0
PyObject *PyCodec_IgnoreErrors(PyObject *exc)
{
    Py_ssize_t end;
    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
    }
    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
        if (PyUnicodeDecodeError_GetEnd(exc, &end))
            return NULL;
    }
    else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
        if (PyUnicodeTranslateError_GetEnd(exc, &end))
            return NULL;
    }
    else {
        wrong_exception_type(exc);
        return NULL;
    }
    return Py_BuildValue("(Nn)", PyUnicode_New(0, 0), end);
}
Example #6
0
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
{
    PyObject *object;
    Py_ssize_t i;
    Py_ssize_t start;
    Py_ssize_t end;
    PyObject *res;
    unsigned char *outp;
    int ressize;
    Py_UCS4 c;

    if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
        const unsigned char *p;
        if (PyUnicodeDecodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeDecodeError_GetEnd(exc, &end))
            return NULL;
        if (!(object = PyUnicodeDecodeError_GetObject(exc)))
            return NULL;
        p = (const unsigned char*)PyBytes_AS_STRING(object);
        res = PyUnicode_New(4 * (end - start), 127);
        if (res == NULL) {
            Py_DECREF(object);
            return NULL;
        }
        outp = PyUnicode_1BYTE_DATA(res);
        for (i = start; i < end; i++, outp += 4) {
            unsigned char c = p[i];
            outp[0] = '\\';
            outp[1] = 'x';
            outp[2] = Py_hexdigits[(c>>4)&0xf];
            outp[3] = Py_hexdigits[c&0xf];
        }

        assert(_PyUnicode_CheckConsistency(res, 1));
        Py_DECREF(object);
        return Py_BuildValue("(Nn)", res, end);
    }
    if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
        if (PyUnicodeEncodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
            return NULL;
    }
    else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) {
        if (PyUnicodeTranslateError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeTranslateError_GetEnd(exc, &end))
            return NULL;
        if (!(object = PyUnicodeTranslateError_GetObject(exc)))
            return NULL;
    }
    else {
        wrong_exception_type(exc);
        return NULL;
    }

    if (end - start > PY_SSIZE_T_MAX / (1+1+8))
        end = start + PY_SSIZE_T_MAX / (1+1+8);
    for (i = start, ressize = 0; i < end; ++i) {
        /* object is guaranteed to be "ready" */
        c = PyUnicode_READ_CHAR(object, i);
        if (c >= 0x10000) {
            ressize += 1+1+8;
        }
        else if (c >= 0x100) {
            ressize += 1+1+4;
        }
        else
            ressize += 1+1+2;
    }
    res = PyUnicode_New(ressize, 127);
    if (res == NULL) {
        Py_DECREF(object);
        return NULL;
    }
    outp = PyUnicode_1BYTE_DATA(res);
    for (i = start; i < end; ++i) {
        c = PyUnicode_READ_CHAR(object, i);
        *outp++ = '\\';
        if (c >= 0x00010000) {
            *outp++ = 'U';
            *outp++ = Py_hexdigits[(c>>28)&0xf];
            *outp++ = Py_hexdigits[(c>>24)&0xf];
            *outp++ = Py_hexdigits[(c>>20)&0xf];
            *outp++ = Py_hexdigits[(c>>16)&0xf];
            *outp++ = Py_hexdigits[(c>>12)&0xf];
            *outp++ = Py_hexdigits[(c>>8)&0xf];
        }
        else if (c >= 0x100) {
Example #7
0
PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
{
    if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
        PyObject *restuple;
        PyObject *object;
        Py_ssize_t i;
        Py_ssize_t start;
        Py_ssize_t end;
        PyObject *res;
        unsigned char *outp;
        Py_ssize_t ressize;
        Py_UCS4 ch;
        if (PyUnicodeEncodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
            return NULL;
        if (end - start > PY_SSIZE_T_MAX / (2+7+1))
            end = start + PY_SSIZE_T_MAX / (2+7+1);
        for (i = start, ressize = 0; i < end; ++i) {
            /* object is guaranteed to be "ready" */
            ch = PyUnicode_READ_CHAR(object, i);
            if (ch<10)
                ressize += 2+1+1;
            else if (ch<100)
                ressize += 2+2+1;
            else if (ch<1000)
                ressize += 2+3+1;
            else if (ch<10000)
                ressize += 2+4+1;
            else if (ch<100000)
                ressize += 2+5+1;
            else if (ch<1000000)
                ressize += 2+6+1;
            else
                ressize += 2+7+1;
        }
        /* allocate replacement */
        res = PyUnicode_New(ressize, 127);
        if (res == NULL) {
            Py_DECREF(object);
            return NULL;
        }
        outp = PyUnicode_1BYTE_DATA(res);
        /* generate replacement */
        for (i = start; i < end; ++i) {
            int digits;
            int base;
            ch = PyUnicode_READ_CHAR(object, i);
            *outp++ = '&';
            *outp++ = '#';
            if (ch<10) {
                digits = 1;
                base = 1;
            }
            else if (ch<100) {
                digits = 2;
                base = 10;
            }
            else if (ch<1000) {
                digits = 3;
                base = 100;
            }
            else if (ch<10000) {
                digits = 4;
                base = 1000;
            }
            else if (ch<100000) {
                digits = 5;
                base = 10000;
            }
            else if (ch<1000000) {
                digits = 6;
                base = 100000;
            }
            else {
                digits = 7;
                base = 1000000;
            }
            while (digits-->0) {
                *outp++ = '0' + ch/base;
                ch %= base;
                base /= 10;
            }
            *outp++ = ';';
        }
        assert(_PyUnicode_CheckConsistency(res, 1));
        restuple = Py_BuildValue("(Nn)", res, end);
        Py_DECREF(object);
        return restuple;
    }
    else {
        wrong_exception_type(exc);
        return NULL;
    }
}
Example #8
0
/* zipimporter.__init__
   Split the "subdirectory" from the Zip archive path, lookup a matching
   entry in sys.path_importer_cache, fetch the file directory from there
   if found, or else read it from the archive. */
static int
zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
{
    PyObject *path, *files, *tmp;
    PyObject *filename = NULL;
    Py_ssize_t len, flen;

    if (!_PyArg_NoKeywords("zipimporter()", kwds))
        return -1;

    if (!PyArg_ParseTuple(args, "O&:zipimporter",
                          PyUnicode_FSDecoder, &path))
        return -1;

    if (PyUnicode_READY(path) == -1)
        return -1;

    len = PyUnicode_GET_LENGTH(path);
    if (len == 0) {
        PyErr_SetString(ZipImportError, "archive path is empty");
        goto error;
    }

#ifdef ALTSEP
    tmp = _PyObject_CallMethodId(path, &PyId_replace, "CC", ALTSEP, SEP);
    if (!tmp)
        goto error;
    Py_DECREF(path);
    path = tmp;
#endif

    filename = path;
    Py_INCREF(filename);
    flen = len;
    for (;;) {
        struct stat statbuf;
        int rv;

        rv = _Py_stat(filename, &statbuf);
        if (rv == -2)
            goto error;
        if (rv == 0) {
            /* it exists */
            if (!S_ISREG(statbuf.st_mode))
                /* it's a not file */
                Py_CLEAR(filename);
            break;
        }
        Py_CLEAR(filename);
        /* back up one path element */
        flen = PyUnicode_FindChar(path, SEP, 0, flen, -1);
        if (flen == -1)
            break;
        filename = PyUnicode_Substring(path, 0, flen);
        if (filename == NULL)
            goto error;
    }
    if (filename == NULL) {
        PyErr_SetString(ZipImportError, "not a Zip file");
        goto error;
    }

    if (PyUnicode_READY(filename) < 0)
        goto error;

    files = PyDict_GetItem(zip_directory_cache, filename);
    if (files == NULL) {
        files = read_directory(filename);
        if (files == NULL)
            goto error;
        if (PyDict_SetItem(zip_directory_cache, filename, files) != 0)
            goto error;
    }
    else
        Py_INCREF(files);
    self->files = files;

    /* Transfer reference */
    self->archive = filename;
    filename = NULL;

    /* Check if there is a prefix directory following the filename. */
    if (flen != len) {
        tmp = PyUnicode_Substring(path, flen+1,
                                  PyUnicode_GET_LENGTH(path));
        if (tmp == NULL)
            goto error;
        self->prefix = tmp;
        if (PyUnicode_READ_CHAR(path, len-1) != SEP) {
            /* add trailing SEP */
            tmp = PyUnicode_FromFormat("%U%c", self->prefix, SEP);
            if (tmp == NULL)
                goto error;
            Py_SETREF(self->prefix, tmp);
        }
    }
    else
        self->prefix = PyUnicode_New(0, 0);
    Py_DECREF(path);
    return 0;

error:
    Py_DECREF(path);
    Py_XDECREF(filename);
    return -1;
}
Example #9
0
// Convert a QString to a Python Unicode object.
PyObject *qpycore_PyObject_FromQString(const QString &qstr)
{
    PyObject *obj;

#if defined(PYQT_PEP_393)
    // We have to work out exactly which kind to use.  We assume ASCII while we
    // are checking so that we only go through the string once is the most
    // common case.  Note that we can't use PyUnicode_FromKindAndData() because
    // it doesn't handle surrogates in UCS2 strings.
    int py_len = qstr.length();

    if ((obj = PyUnicode_New(py_len, 0x007f)) == NULL)
        return NULL;

    int kind = PyUnicode_KIND(obj);
    void *data = PyUnicode_DATA(obj);
    const QChar *qch = qstr.data();

    for (int i = 0; i < py_len; ++i)
    {
        ushort uch = qch->unicode();

        if (uch > 0x007f)
        {
            // This is useless.
            Py_DECREF(obj);

            // Work out what kind we really need and what the Python length
            // should be.
            Py_UCS4 maxchar = 0x00ff;

            do
            {
                if (uch > 0x00ff)
                {
                    if (maxchar == 0x00ff)
                        maxchar = 0x00ffff;

                    // See if this is a surrogate pair.  We don't need to
                    // bounds check because Qt puts a null QChar on the end.
                    if (qch->isHighSurrogate() && (qch + 1)->isLowSurrogate())
                    {
                        maxchar = 0x10ffff;
                        --py_len;
                        ++qch;
                    }
                }

                uch = (++qch)->unicode();
            }
            while (!qch->isNull());

            // Create the correctly sized object.
            if ((obj = PyUnicode_New(py_len, maxchar)) == NULL)
                return NULL;

            kind = PyUnicode_KIND(obj);
            data = PyUnicode_DATA(obj);
            qch = qstr.data();

            for (int py_i = 0; py_i < py_len; ++py_i)
            {
                Py_UCS4 py_ch;

                if (qch->isHighSurrogate() && (qch + 1)->isLowSurrogate())
                {
                    py_ch = QChar::surrogateToUcs4(*qch, *(qch + 1));
                    ++qch;
                }
                else
                {
                    py_ch = qch->unicode();
                }

                ++qch;

                PyUnicode_WRITE(kind, data, py_i, py_ch);
            }

            break;
        }

        ++qch;

        PyUnicode_WRITE(kind, data, i, uch);
    }
#elif defined(Py_UNICODE_WIDE)
    QVector<uint> ucs4 = qstr.toUcs4();

    if ((obj = PyUnicode_FromUnicode(NULL, ucs4.size())) == NULL)
        return NULL;

    memcpy(PyUnicode_AS_UNICODE(obj), ucs4.constData(),
            ucs4.size() * sizeof (Py_UNICODE));
#else
    if ((obj = PyUnicode_FromUnicode(NULL, qstr.length())) == NULL)
        return NULL;

    memcpy(PyUnicode_AS_UNICODE(obj), qstr.utf16(),
            qstr.length() * sizeof (Py_UNICODE));
#endif

    return obj;
}
Example #10
0
static int
zipimport_zipimporter___init___impl(ZipImporter *self, PyObject *path)
/*[clinic end generated code: output=141558fefdb46dc8 input=92b9ebeed1f6a704]*/
{
    PyObject *files, *tmp;
    PyObject *filename = NULL;
    Py_ssize_t len, flen;

    if (PyUnicode_READY(path) == -1)
        return -1;

    len = PyUnicode_GET_LENGTH(path);
    if (len == 0) {
        PyErr_SetString(ZipImportError, "archive path is empty");
        goto error;
    }

#ifdef ALTSEP
    tmp = _PyObject_CallMethodId(path, &PyId_replace, "CC", ALTSEP, SEP);
    if (!tmp)
        goto error;
    Py_DECREF(path);
    path = tmp;
#endif

    filename = path;
    Py_INCREF(filename);
    flen = len;
    for (;;) {
        struct stat statbuf;
        int rv;

        rv = _Py_stat(filename, &statbuf);
        if (rv == -2)
            goto error;
        if (rv == 0) {
            /* it exists */
            if (!S_ISREG(statbuf.st_mode))
                /* it's a not file */
                Py_CLEAR(filename);
            break;
        }
        Py_CLEAR(filename);
        /* back up one path element */
        flen = PyUnicode_FindChar(path, SEP, 0, flen, -1);
        if (flen == -1)
            break;
        filename = PyUnicode_Substring(path, 0, flen);
        if (filename == NULL)
            goto error;
    }
    if (filename == NULL) {
        PyErr_SetString(ZipImportError, "not a Zip file");
        goto error;
    }

    if (PyUnicode_READY(filename) < 0)
        goto error;

    files = PyDict_GetItem(zip_directory_cache, filename);
    if (files == NULL) {
        files = read_directory(filename);
        if (files == NULL)
            goto error;
        if (PyDict_SetItem(zip_directory_cache, filename, files) != 0)
            goto error;
    }
    else
        Py_INCREF(files);
    Py_XSETREF(self->files, files);

    /* Transfer reference */
    Py_XSETREF(self->archive, filename);
    filename = NULL;

    /* Check if there is a prefix directory following the filename. */
    if (flen != len) {
        tmp = PyUnicode_Substring(path, flen+1,
                                  PyUnicode_GET_LENGTH(path));
        if (tmp == NULL)
            goto error;
        Py_XSETREF(self->prefix, tmp);
        if (PyUnicode_READ_CHAR(path, len-1) != SEP) {
            /* add trailing SEP */
            tmp = PyUnicode_FromFormat("%U%c", self->prefix, SEP);
            if (tmp == NULL)
                goto error;
            Py_SETREF(self->prefix, tmp);
        }
    }
    else {
        Py_XSETREF(self->prefix, PyUnicode_New(0, 0));
    }
    Py_DECREF(path);
    return 0;

error:
    Py_DECREF(path);
    Py_XDECREF(filename);
    return -1;
}
/* define unicode version of xmlescape */
static PyObject *xmlescape_str(PyObject *str, int doquot, int doapos)
{
	Py_ssize_t oldsize;
	void *olddata;
	int maxchar = 127;
	Py_ssize_t i;
	Py_ssize_t newsize = 0;
	void *newdata;

	int kind = PyUnicode_KIND(str);

	oldsize = PyUnicode_GET_LENGTH(str);
	olddata = PyUnicode_DATA(str);
	for (i = 0; i < oldsize; ++i)
	{
		Py_UCS4 ch = PyUnicode_READ(kind, olddata, i);
		if (ch == ((Py_UCS4)'<'))
			newsize += 4; /* &lt; */
		else if (ch == (Py_UCS4)'>') /* Note that we always replace '>' with its entity, not just in case it is part of ']]>' */
			newsize += 4; /* &gt; */
		else if (ch == (Py_UCS4)'&')
			newsize += 5; /* &amp; */
		else if ((ch == (Py_UCS4)'"') && doquot)
			newsize += 6; /* &quot; */
		else if ((ch == (Py_UCS4)'\'') && doapos)
			newsize += 5; /* &#39; */
		else if (ch <= 0x8)
			newsize += 4;
		else if ((ch >= 0xB) && (ch <= 0x1F) && (ch != 0xD))
			newsize += 5;
		else if ((ch >= 0x7F) && (ch <= 0x9F) && (ch != 0x85))
			newsize += 6;
		else
		{
			newsize++;
			if (ch > maxchar)
				maxchar = ch;
		}
	}
	if (oldsize==newsize)
	{
		/* nothing to replace => return original */
		Py_INCREF(str);
		return str;
	}
	else
	{
		int index = 0;
		PyObject *result = PyUnicode_New(newsize, maxchar);
		newdata = PyUnicode_DATA(result);
		if (result == NULL)
			return NULL;
		for (i = 0; i < oldsize; ++i)
		{
			Py_UCS4 ch = PyUnicode_READ(kind, olddata, i);
			if (ch == (Py_UCS4)'<')
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, 'l');
				PyUnicode_WRITE(kind, newdata, index++, 't');
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if (ch == (Py_UCS4)'>')
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, 'g');
				PyUnicode_WRITE(kind, newdata, index++, 't');
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if (ch == (Py_UCS4)'&')
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, 'a');
				PyUnicode_WRITE(kind, newdata, index++, 'm');
				PyUnicode_WRITE(kind, newdata, index++, 'p');
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if ((ch == (Py_UCS4)'"') && doquot)
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, 'q');
				PyUnicode_WRITE(kind, newdata, index++, 'u');
				PyUnicode_WRITE(kind, newdata, index++, 'o');
				PyUnicode_WRITE(kind, newdata, index++, 't');
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if ((ch == (Py_UCS4)'\'') && doapos)
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, '#');
				PyUnicode_WRITE(kind, newdata, index++, '3');
				PyUnicode_WRITE(kind, newdata, index++, '9');
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if (ch <= 0x8)
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, '#');
				PyUnicode_WRITE(kind, newdata, index++, '0'+ch);
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if ((ch >= 0xB) && (ch <= 0x1F) && (ch != 0xD))
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, '#');
				PyUnicode_WRITE(kind, newdata, index++, '0'+ch/10);
				PyUnicode_WRITE(kind, newdata, index++, '0'+ch%10);
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if ((ch >= 0x7F) && (ch <= 0x9F) && (ch != 0x85))
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, '#');
				PyUnicode_WRITE(kind, newdata, index++, '0'+ch/100);
				PyUnicode_WRITE(kind, newdata, index++, '0'+(ch/10)%10);
				PyUnicode_WRITE(kind, newdata, index++, '0'+ch%10);
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else
				PyUnicode_WRITE(kind, newdata, index++, ch);
		}
		return result;
	}
}