示例#1
0
文件: codecs.c 项目: 3lnc/cpython
PyObject *PyCodec_ReplaceErrors(PyObject *exc)
{
    Py_ssize_t start, end, i, len;

    if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
        PyObject *res;
        int kind;
        void *data;
        if (PyUnicodeEncodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
        len = end - start;
        res = PyUnicode_New(len, '?');
        if (res == NULL)
            return NULL;
        kind = PyUnicode_KIND(res);
        data = PyUnicode_DATA(res);
        for (i = 0; i < len; ++i)
            PyUnicode_WRITE(kind, data, i, '?');
        assert(_PyUnicode_CheckConsistency(res, 1));
        return Py_BuildValue("(Nn)", res, end);
    }
    else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
        if (PyUnicodeDecodeError_GetEnd(exc, &end))
            return NULL;
        return Py_BuildValue("(Cn)",
                             (int)Py_UNICODE_REPLACEMENT_CHARACTER,
                             end);
    }
    else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) {
        PyObject *res;
        int kind;
        void *data;
        if (PyUnicodeTranslateError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeTranslateError_GetEnd(exc, &end))
            return NULL;
        len = end - start;
        res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER);
        if (res == NULL)
            return NULL;
        kind = PyUnicode_KIND(res);
        data = PyUnicode_DATA(res);
        for (i=0; i < len; i++)
            PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER);
        assert(_PyUnicode_CheckConsistency(res, 1));
        return Py_BuildValue("(Nn)", res, end);
    }
    else {
        wrong_exception_type(exc);
        return NULL;
    }
}
示例#2
0
/* Fill in the digit parts of a numbers's string representation,
   as determined in calc_number_widths().
   Return -1 on error, or 0 on success. */
static int
fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
            PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
            PyObject *prefix, Py_ssize_t p_start,
            Py_UCS4 fill_char,
            LocaleInfo *locale, int toupper)
{
    /* Used to keep track of digits, decimal, and remainder. */
    Py_ssize_t d_pos = d_start;
    const unsigned int kind = writer->kind;
    const void *data = writer->data;
    Py_ssize_t r;

    if (spec->n_lpadding) {
        _PyUnicode_FastFill(writer->buffer,
                            writer->pos, spec->n_lpadding, fill_char);
        writer->pos += spec->n_lpadding;
    }
    if (spec->n_sign == 1) {
        PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
        writer->pos++;
    }
    if (spec->n_prefix) {
        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
                                      prefix, p_start,
                                      spec->n_prefix);
        if (toupper) {
            Py_ssize_t t;
            for (t = 0; t < spec->n_prefix; t++) {
                Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
                c = Py_TOUPPER(c);
                assert (c <= 127);
                PyUnicode_WRITE(kind, data, writer->pos + t, c);
            }
        }
        writer->pos += spec->n_prefix;
    }
    if (spec->n_spadding) {
        _PyUnicode_FastFill(writer->buffer,
                            writer->pos, spec->n_spadding, fill_char);
        writer->pos += spec->n_spadding;
    }

    /* Only for type 'c' special case, it has no digits. */
    if (spec->n_digits != 0) {
        /* Fill the digits with InsertThousandsGrouping. */
        char *pdigits;
        if (PyUnicode_READY(digits))
            return -1;
        pdigits = PyUnicode_DATA(digits);
        if (PyUnicode_KIND(digits) < kind) {
            pdigits = _PyUnicode_AsKind(digits, kind);
            if (pdigits == NULL)
                return -1;
        }
        r = _PyUnicode_InsertThousandsGrouping(
                writer->buffer, writer->pos,
                spec->n_grouped_digits,
                pdigits + kind * d_pos,
                spec->n_digits, spec->n_min_width,
                locale->grouping, locale->thousands_sep, NULL);
        if (r == -1)
            return -1;
        assert(r == spec->n_grouped_digits);
        if (PyUnicode_KIND(digits) < kind)
            PyMem_Free(pdigits);
        d_pos += spec->n_digits;
    }
    if (toupper) {
        Py_ssize_t t;
        for (t = 0; t < spec->n_grouped_digits; t++) {
            Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
            c = Py_TOUPPER(c);
            if (c > 127) {
                PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
                return -1;
            }
            PyUnicode_WRITE(kind, data, writer->pos + t, c);
        }
    }
    writer->pos += spec->n_grouped_digits;

    if (spec->n_decimal) {
        _PyUnicode_FastCopyCharacters(
            writer->buffer, writer->pos,
            locale->decimal_point, 0, spec->n_decimal);
        writer->pos += spec->n_decimal;
        d_pos += 1;
    }

    if (spec->n_remainder) {
        _PyUnicode_FastCopyCharacters(
            writer->buffer, writer->pos,
            digits, d_pos, spec->n_remainder);
        writer->pos += spec->n_remainder;
        /* d_pos += spec->n_remainder; */
    }

    if (spec->n_rpadding) {
        _PyUnicode_FastFill(writer->buffer,
                            writer->pos, spec->n_rpadding,
                            fill_char);
        writer->pos += spec->n_rpadding;
    }
    return 0;
}
示例#3
0
static int
format_complex_internal(PyObject *value,
                        const InternalFormatSpec *format,
                        _PyUnicodeWriter *writer)
{
    double re;
    double im;
    char *re_buf = NULL;       /* buffer returned from PyOS_double_to_string */
    char *im_buf = NULL;       /* buffer returned from PyOS_double_to_string */

    InternalFormatSpec tmp_format = *format;
    Py_ssize_t n_re_digits;
    Py_ssize_t n_im_digits;
    Py_ssize_t n_re_remainder;
    Py_ssize_t n_im_remainder;
    Py_ssize_t n_re_total;
    Py_ssize_t n_im_total;
    int re_has_decimal;
    int im_has_decimal;
    int precision, default_precision = 6;
    Py_UCS4 type = format->type;
    Py_ssize_t i_re;
    Py_ssize_t i_im;
    NumberFieldWidths re_spec;
    NumberFieldWidths im_spec;
    int flags = 0;
    int result = -1;
    Py_UCS4 maxchar = 127;
    enum PyUnicode_Kind rkind;
    void *rdata;
    Py_UCS4 re_sign_char = '\0';
    Py_UCS4 im_sign_char = '\0';
    int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
    int im_float_type;
    int add_parens = 0;
    int skip_re = 0;
    Py_ssize_t lpad;
    Py_ssize_t rpad;
    Py_ssize_t total;
    PyObject *re_unicode_tmp = NULL;
    PyObject *im_unicode_tmp = NULL;

    /* Locale settings, either from the actual locale or
       from a hard-code pseudo-locale */
    LocaleInfo locale = STATIC_LOCALE_INFO_INIT;

    if (format->precision > INT_MAX) {
        PyErr_SetString(PyExc_ValueError, "precision too big");
        goto done;
    }
    precision = (int)format->precision;

    /* Zero padding is not allowed. */
    if (format->fill_char == '0') {
        PyErr_SetString(PyExc_ValueError,
                        "Zero padding is not allowed in complex format "
                        "specifier");
        goto done;
    }

    /* Neither is '=' alignment . */
    if (format->align == '=') {
        PyErr_SetString(PyExc_ValueError,
                        "'=' alignment flag is not allowed in complex format "
                        "specifier");
        goto done;
    }

    re = PyComplex_RealAsDouble(value);
    if (re == -1.0 && PyErr_Occurred())
        goto done;
    im = PyComplex_ImagAsDouble(value);
    if (im == -1.0 && PyErr_Occurred())
        goto done;

    if (format->alternate)
        flags |= Py_DTSF_ALT;

    if (type == '\0') {
        /* Omitted type specifier. Should be like str(self). */
        type = 'r';
        default_precision = 0;
        if (re == 0.0 && copysign(1.0, re) == 1.0)
            skip_re = 1;
        else
            add_parens = 1;
    }

    if (type == 'n')
        /* 'n' is the same as 'g', except for the locale used to
           format the result. We take care of that later. */
        type = 'g';

    if (precision < 0)
        precision = default_precision;
    else if (type == 'r')
        type = 'g';

    /* Cast "type", because if we're in unicode we need to pass a
       8-bit char. This is safe, because we've restricted what "type"
       can be. */
    re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
                                   &re_float_type);
    if (re_buf == NULL)
        goto done;
    im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
                                   &im_float_type);
    if (im_buf == NULL)
        goto done;

    n_re_digits = strlen(re_buf);
    n_im_digits = strlen(im_buf);

    /* Since there is no unicode version of PyOS_double_to_string,
       just use the 8 bit version and then convert to unicode. */
    re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
    if (re_unicode_tmp == NULL)
        goto done;
    i_re = 0;

    im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
    if (im_unicode_tmp == NULL)
        goto done;
    i_im = 0;

    /* Is a sign character present in the output?  If so, remember it
       and skip it */
    if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
        re_sign_char = '-';
        ++i_re;
        --n_re_digits;
    }
    if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
        im_sign_char = '-';
        ++i_im;
        --n_im_digits;
    }

    /* Determine if we have any "remainder" (after the digits, might include
       decimal or exponent or both (or neither)) */
    parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
                 &n_re_remainder, &re_has_decimal);
    parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
                 &n_im_remainder, &im_has_decimal);

    /* Determine the grouping, separator, and decimal point, if any. */
    if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
                        (format->thousands_separators ?
                         LT_DEFAULT_LOCALE :
                         LT_NO_LOCALE),
                        &locale) == -1)
        goto done;

    /* Turn off any padding. We'll do it later after we've composed
       the numbers without padding. */
    tmp_format.fill_char = '\0';
    tmp_format.align = '<';
    tmp_format.width = -1;

    /* Calculate how much memory we'll need. */
    n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
                                    i_re, i_re + n_re_digits, n_re_remainder,
                                    re_has_decimal, &locale, &tmp_format,
                                    &maxchar);

    /* Same formatting, but always include a sign, unless the real part is
     * going to be omitted, in which case we use whatever sign convention was
     * requested by the original format. */
    if (!skip_re)
        tmp_format.sign = '+';
    n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
                                    i_im, i_im + n_im_digits, n_im_remainder,
                                    im_has_decimal, &locale, &tmp_format,
                                    &maxchar);

    if (skip_re)
        n_re_total = 0;

    /* Add 1 for the 'j', and optionally 2 for parens. */
    calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
                 format->width, format->align, &lpad, &rpad, &total);

    if (lpad || rpad)
        maxchar = Py_MAX(maxchar, format->fill_char);

    if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
        goto done;
    rkind = writer->kind;
    rdata = writer->data;

    /* Populate the memory. First, the padding. */
    result = fill_padding(writer,
                          n_re_total + n_im_total + 1 + add_parens * 2,
                          format->fill_char, lpad, rpad);
    if (result == -1)
        goto done;

    if (add_parens) {
        PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
        writer->pos++;
    }

    if (!skip_re) {
        result = fill_number(writer, &re_spec,
                             re_unicode_tmp, i_re, i_re + n_re_digits,
                             NULL, 0,
                             0,
                             &locale, 0);
        if (result == -1)
            goto done;
    }
    result = fill_number(writer, &im_spec,
                         im_unicode_tmp, i_im, i_im + n_im_digits,
                         NULL, 0,
                         0,
                         &locale, 0);
    if (result == -1)
        goto done;
    PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
    writer->pos++;

    if (add_parens) {
        PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
        writer->pos++;
    }

    writer->pos += rpad;

done:
    PyMem_Free(re_buf);
    PyMem_Free(im_buf);
    Py_XDECREF(re_unicode_tmp);
    Py_XDECREF(im_unicode_tmp);
    free_locale_info(&locale);
    return result;
}
示例#4
0
// Convert a QString to a Python Unicode object.
PyObject *qpycore_PyObject_FromQString(const QString &qstr)
{
    PyObject *obj;

#if defined(PYQT_PEP_393)
    // We have to work out exactly which kind to use.  We assume ASCII while we
    // are checking so that we only go through the string once is the most
    // common case.  Note that we can't use PyUnicode_FromKindAndData() because
    // it doesn't handle surrogates in UCS2 strings.
    int py_len = qstr.length();

    if ((obj = PyUnicode_New(py_len, 0x007f)) == NULL)
        return NULL;

    int kind = PyUnicode_KIND(obj);
    void *data = PyUnicode_DATA(obj);
    const QChar *qch = qstr.data();

    for (int i = 0; i < py_len; ++i)
    {
        ushort uch = qch->unicode();

        if (uch > 0x007f)
        {
            // This is useless.
            Py_DECREF(obj);

            // Work out what kind we really need and what the Python length
            // should be.
            Py_UCS4 maxchar = 0x00ff;

            do
            {
                if (uch > 0x00ff)
                {
                    if (maxchar == 0x00ff)
                        maxchar = 0x00ffff;

                    // See if this is a surrogate pair.  We don't need to
                    // bounds check because Qt puts a null QChar on the end.
                    if (qch->isHighSurrogate() && (qch + 1)->isLowSurrogate())
                    {
                        maxchar = 0x10ffff;
                        --py_len;
                        ++qch;
                    }
                }

                uch = (++qch)->unicode();
            }
            while (!qch->isNull());

            // Create the correctly sized object.
            if ((obj = PyUnicode_New(py_len, maxchar)) == NULL)
                return NULL;

            kind = PyUnicode_KIND(obj);
            data = PyUnicode_DATA(obj);
            qch = qstr.data();

            for (int py_i = 0; py_i < py_len; ++py_i)
            {
                Py_UCS4 py_ch;

                if (qch->isHighSurrogate() && (qch + 1)->isLowSurrogate())
                {
                    py_ch = QChar::surrogateToUcs4(*qch, *(qch + 1));
                    ++qch;
                }
                else
                {
                    py_ch = qch->unicode();
                }

                ++qch;

                PyUnicode_WRITE(kind, data, py_i, py_ch);
            }

            break;
        }

        ++qch;

        PyUnicode_WRITE(kind, data, i, uch);
    }
#elif defined(Py_UNICODE_WIDE)
    QVector<uint> ucs4 = qstr.toUcs4();

    if ((obj = PyUnicode_FromUnicode(NULL, ucs4.size())) == NULL)
        return NULL;

    memcpy(PyUnicode_AS_UNICODE(obj), ucs4.constData(),
            ucs4.size() * sizeof (Py_UNICODE));
#else
    if ((obj = PyUnicode_FromUnicode(NULL, qstr.length())) == NULL)
        return NULL;

    memcpy(PyUnicode_AS_UNICODE(obj), qstr.utf16(),
            qstr.length() * sizeof (Py_UNICODE));
#endif

    return obj;
}
/* define unicode version of xmlescape */
static PyObject *xmlescape_str(PyObject *str, int doquot, int doapos)
{
	Py_ssize_t oldsize;
	void *olddata;
	int maxchar = 127;
	Py_ssize_t i;
	Py_ssize_t newsize = 0;
	void *newdata;

	int kind = PyUnicode_KIND(str);

	oldsize = PyUnicode_GET_LENGTH(str);
	olddata = PyUnicode_DATA(str);
	for (i = 0; i < oldsize; ++i)
	{
		Py_UCS4 ch = PyUnicode_READ(kind, olddata, i);
		if (ch == ((Py_UCS4)'<'))
			newsize += 4; /* &lt; */
		else if (ch == (Py_UCS4)'>') /* Note that we always replace '>' with its entity, not just in case it is part of ']]>' */
			newsize += 4; /* &gt; */
		else if (ch == (Py_UCS4)'&')
			newsize += 5; /* &amp; */
		else if ((ch == (Py_UCS4)'"') && doquot)
			newsize += 6; /* &quot; */
		else if ((ch == (Py_UCS4)'\'') && doapos)
			newsize += 5; /* &#39; */
		else if (ch <= 0x8)
			newsize += 4;
		else if ((ch >= 0xB) && (ch <= 0x1F) && (ch != 0xD))
			newsize += 5;
		else if ((ch >= 0x7F) && (ch <= 0x9F) && (ch != 0x85))
			newsize += 6;
		else
		{
			newsize++;
			if (ch > maxchar)
				maxchar = ch;
		}
	}
	if (oldsize==newsize)
	{
		/* nothing to replace => return original */
		Py_INCREF(str);
		return str;
	}
	else
	{
		int index = 0;
		PyObject *result = PyUnicode_New(newsize, maxchar);
		newdata = PyUnicode_DATA(result);
		if (result == NULL)
			return NULL;
		for (i = 0; i < oldsize; ++i)
		{
			Py_UCS4 ch = PyUnicode_READ(kind, olddata, i);
			if (ch == (Py_UCS4)'<')
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, 'l');
				PyUnicode_WRITE(kind, newdata, index++, 't');
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if (ch == (Py_UCS4)'>')
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, 'g');
				PyUnicode_WRITE(kind, newdata, index++, 't');
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if (ch == (Py_UCS4)'&')
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, 'a');
				PyUnicode_WRITE(kind, newdata, index++, 'm');
				PyUnicode_WRITE(kind, newdata, index++, 'p');
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if ((ch == (Py_UCS4)'"') && doquot)
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, 'q');
				PyUnicode_WRITE(kind, newdata, index++, 'u');
				PyUnicode_WRITE(kind, newdata, index++, 'o');
				PyUnicode_WRITE(kind, newdata, index++, 't');
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if ((ch == (Py_UCS4)'\'') && doapos)
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, '#');
				PyUnicode_WRITE(kind, newdata, index++, '3');
				PyUnicode_WRITE(kind, newdata, index++, '9');
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if (ch <= 0x8)
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, '#');
				PyUnicode_WRITE(kind, newdata, index++, '0'+ch);
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if ((ch >= 0xB) && (ch <= 0x1F) && (ch != 0xD))
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, '#');
				PyUnicode_WRITE(kind, newdata, index++, '0'+ch/10);
				PyUnicode_WRITE(kind, newdata, index++, '0'+ch%10);
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else if ((ch >= 0x7F) && (ch <= 0x9F) && (ch != 0x85))
			{
				PyUnicode_WRITE(kind, newdata, index++, '&');
				PyUnicode_WRITE(kind, newdata, index++, '#');
				PyUnicode_WRITE(kind, newdata, index++, '0'+ch/100);
				PyUnicode_WRITE(kind, newdata, index++, '0'+(ch/10)%10);
				PyUnicode_WRITE(kind, newdata, index++, '0'+ch%10);
				PyUnicode_WRITE(kind, newdata, index++, ';');
			}
			else
				PyUnicode_WRITE(kind, newdata, index++, ch);
		}
		return result;
	}
}