PyObject *PyCodec_ReplaceErrors(PyObject *exc) { Py_ssize_t start, end, i, len; if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { PyObject *res; int kind; void *data; if (PyUnicodeEncodeError_GetStart(exc, &start)) return NULL; if (PyUnicodeEncodeError_GetEnd(exc, &end)) return NULL; len = end - start; res = PyUnicode_New(len, '?'); if (res == NULL) return NULL; kind = PyUnicode_KIND(res); data = PyUnicode_DATA(res); for (i = 0; i < len; ++i) PyUnicode_WRITE(kind, data, i, '?'); assert(_PyUnicode_CheckConsistency(res, 1)); return Py_BuildValue("(Nn)", res, end); } else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) { if (PyUnicodeDecodeError_GetEnd(exc, &end)) return NULL; return Py_BuildValue("(Cn)", (int)Py_UNICODE_REPLACEMENT_CHARACTER, end); } else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) { PyObject *res; int kind; void *data; if (PyUnicodeTranslateError_GetStart(exc, &start)) return NULL; if (PyUnicodeTranslateError_GetEnd(exc, &end)) return NULL; len = end - start; res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER); if (res == NULL) return NULL; kind = PyUnicode_KIND(res); data = PyUnicode_DATA(res); for (i=0; i < len; i++) PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER); assert(_PyUnicode_CheckConsistency(res, 1)); return Py_BuildValue("(Nn)", res, end); } else { wrong_exception_type(exc); return NULL; } }
/* Fill in the digit parts of a numbers's string representation, as determined in calc_number_widths(). Return -1 on error, or 0 on success. */ static int fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end, PyObject *prefix, Py_ssize_t p_start, Py_UCS4 fill_char, LocaleInfo *locale, int toupper) { /* Used to keep track of digits, decimal, and remainder. */ Py_ssize_t d_pos = d_start; const unsigned int kind = writer->kind; const void *data = writer->data; Py_ssize_t r; if (spec->n_lpadding) { _PyUnicode_FastFill(writer->buffer, writer->pos, spec->n_lpadding, fill_char); writer->pos += spec->n_lpadding; } if (spec->n_sign == 1) { PyUnicode_WRITE(kind, data, writer->pos, spec->sign); writer->pos++; } if (spec->n_prefix) { _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, prefix, p_start, spec->n_prefix); if (toupper) { Py_ssize_t t; for (t = 0; t < spec->n_prefix; t++) { Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t); c = Py_TOUPPER(c); assert (c <= 127); PyUnicode_WRITE(kind, data, writer->pos + t, c); } } writer->pos += spec->n_prefix; } if (spec->n_spadding) { _PyUnicode_FastFill(writer->buffer, writer->pos, spec->n_spadding, fill_char); writer->pos += spec->n_spadding; } /* Only for type 'c' special case, it has no digits. */ if (spec->n_digits != 0) { /* Fill the digits with InsertThousandsGrouping. */ char *pdigits; if (PyUnicode_READY(digits)) return -1; pdigits = PyUnicode_DATA(digits); if (PyUnicode_KIND(digits) < kind) { pdigits = _PyUnicode_AsKind(digits, kind); if (pdigits == NULL) return -1; } r = _PyUnicode_InsertThousandsGrouping( writer->buffer, writer->pos, spec->n_grouped_digits, pdigits + kind * d_pos, spec->n_digits, spec->n_min_width, locale->grouping, locale->thousands_sep, NULL); if (r == -1) return -1; assert(r == spec->n_grouped_digits); if (PyUnicode_KIND(digits) < kind) PyMem_Free(pdigits); d_pos += spec->n_digits; } if (toupper) { Py_ssize_t t; for (t = 0; t < spec->n_grouped_digits; t++) { Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t); c = Py_TOUPPER(c); if (c > 127) { PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit"); return -1; } PyUnicode_WRITE(kind, data, writer->pos + t, c); } } writer->pos += spec->n_grouped_digits; if (spec->n_decimal) { _PyUnicode_FastCopyCharacters( writer->buffer, writer->pos, locale->decimal_point, 0, spec->n_decimal); writer->pos += spec->n_decimal; d_pos += 1; } if (spec->n_remainder) { _PyUnicode_FastCopyCharacters( writer->buffer, writer->pos, digits, d_pos, spec->n_remainder); writer->pos += spec->n_remainder; /* d_pos += spec->n_remainder; */ } if (spec->n_rpadding) { _PyUnicode_FastFill(writer->buffer, writer->pos, spec->n_rpadding, fill_char); writer->pos += spec->n_rpadding; } return 0; }
static int format_complex_internal(PyObject *value, const InternalFormatSpec *format, _PyUnicodeWriter *writer) { double re; double im; char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */ char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */ InternalFormatSpec tmp_format = *format; Py_ssize_t n_re_digits; Py_ssize_t n_im_digits; Py_ssize_t n_re_remainder; Py_ssize_t n_im_remainder; Py_ssize_t n_re_total; Py_ssize_t n_im_total; int re_has_decimal; int im_has_decimal; int precision, default_precision = 6; Py_UCS4 type = format->type; Py_ssize_t i_re; Py_ssize_t i_im; NumberFieldWidths re_spec; NumberFieldWidths im_spec; int flags = 0; int result = -1; Py_UCS4 maxchar = 127; enum PyUnicode_Kind rkind; void *rdata; Py_UCS4 re_sign_char = '\0'; Py_UCS4 im_sign_char = '\0'; int re_float_type; /* Used to see if we have a nan, inf, or regular float. */ int im_float_type; int add_parens = 0; int skip_re = 0; Py_ssize_t lpad; Py_ssize_t rpad; Py_ssize_t total; PyObject *re_unicode_tmp = NULL; PyObject *im_unicode_tmp = NULL; /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ LocaleInfo locale = STATIC_LOCALE_INFO_INIT; if (format->precision > INT_MAX) { PyErr_SetString(PyExc_ValueError, "precision too big"); goto done; } precision = (int)format->precision; /* Zero padding is not allowed. */ if (format->fill_char == '0') { PyErr_SetString(PyExc_ValueError, "Zero padding is not allowed in complex format " "specifier"); goto done; } /* Neither is '=' alignment . */ if (format->align == '=') { PyErr_SetString(PyExc_ValueError, "'=' alignment flag is not allowed in complex format " "specifier"); goto done; } re = PyComplex_RealAsDouble(value); if (re == -1.0 && PyErr_Occurred()) goto done; im = PyComplex_ImagAsDouble(value); if (im == -1.0 && PyErr_Occurred()) goto done; if (format->alternate) flags |= Py_DTSF_ALT; if (type == '\0') { /* Omitted type specifier. Should be like str(self). */ type = 'r'; default_precision = 0; if (re == 0.0 && copysign(1.0, re) == 1.0) skip_re = 1; else add_parens = 1; } if (type == 'n') /* 'n' is the same as 'g', except for the locale used to format the result. We take care of that later. */ type = 'g'; if (precision < 0) precision = default_precision; else if (type == 'r') type = 'g'; /* Cast "type", because if we're in unicode we need to pass a 8-bit char. This is safe, because we've restricted what "type" can be. */ re_buf = PyOS_double_to_string(re, (char)type, precision, flags, &re_float_type); if (re_buf == NULL) goto done; im_buf = PyOS_double_to_string(im, (char)type, precision, flags, &im_float_type); if (im_buf == NULL) goto done; n_re_digits = strlen(re_buf); n_im_digits = strlen(im_buf); /* Since there is no unicode version of PyOS_double_to_string, just use the 8 bit version and then convert to unicode. */ re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits); if (re_unicode_tmp == NULL) goto done; i_re = 0; im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits); if (im_unicode_tmp == NULL) goto done; i_im = 0; /* Is a sign character present in the output? If so, remember it and skip it */ if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') { re_sign_char = '-'; ++i_re; --n_re_digits; } if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') { im_sign_char = '-'; ++i_im; --n_im_digits; } /* Determine if we have any "remainder" (after the digits, might include decimal or exponent or both (or neither)) */ parse_number(re_unicode_tmp, i_re, i_re + n_re_digits, &n_re_remainder, &re_has_decimal); parse_number(im_unicode_tmp, i_im, i_im + n_im_digits, &n_im_remainder, &im_has_decimal); /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : (format->thousands_separators ? LT_DEFAULT_LOCALE : LT_NO_LOCALE), &locale) == -1) goto done; /* Turn off any padding. We'll do it later after we've composed the numbers without padding. */ tmp_format.fill_char = '\0'; tmp_format.align = '<'; tmp_format.width = -1; /* Calculate how much memory we'll need. */ n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp, i_re, i_re + n_re_digits, n_re_remainder, re_has_decimal, &locale, &tmp_format, &maxchar); /* Same formatting, but always include a sign, unless the real part is * going to be omitted, in which case we use whatever sign convention was * requested by the original format. */ if (!skip_re) tmp_format.sign = '+'; n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp, i_im, i_im + n_im_digits, n_im_remainder, im_has_decimal, &locale, &tmp_format, &maxchar); if (skip_re) n_re_total = 0; /* Add 1 for the 'j', and optionally 2 for parens. */ calc_padding(n_re_total + n_im_total + 1 + add_parens * 2, format->width, format->align, &lpad, &rpad, &total); if (lpad || rpad) maxchar = Py_MAX(maxchar, format->fill_char); if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1) goto done; rkind = writer->kind; rdata = writer->data; /* Populate the memory. First, the padding. */ result = fill_padding(writer, n_re_total + n_im_total + 1 + add_parens * 2, format->fill_char, lpad, rpad); if (result == -1) goto done; if (add_parens) { PyUnicode_WRITE(rkind, rdata, writer->pos, '('); writer->pos++; } if (!skip_re) { result = fill_number(writer, &re_spec, re_unicode_tmp, i_re, i_re + n_re_digits, NULL, 0, 0, &locale, 0); if (result == -1) goto done; } result = fill_number(writer, &im_spec, im_unicode_tmp, i_im, i_im + n_im_digits, NULL, 0, 0, &locale, 0); if (result == -1) goto done; PyUnicode_WRITE(rkind, rdata, writer->pos, 'j'); writer->pos++; if (add_parens) { PyUnicode_WRITE(rkind, rdata, writer->pos, ')'); writer->pos++; } writer->pos += rpad; done: PyMem_Free(re_buf); PyMem_Free(im_buf); Py_XDECREF(re_unicode_tmp); Py_XDECREF(im_unicode_tmp); free_locale_info(&locale); return result; }
// Convert a QString to a Python Unicode object. PyObject *qpycore_PyObject_FromQString(const QString &qstr) { PyObject *obj; #if defined(PYQT_PEP_393) // We have to work out exactly which kind to use. We assume ASCII while we // are checking so that we only go through the string once is the most // common case. Note that we can't use PyUnicode_FromKindAndData() because // it doesn't handle surrogates in UCS2 strings. int py_len = qstr.length(); if ((obj = PyUnicode_New(py_len, 0x007f)) == NULL) return NULL; int kind = PyUnicode_KIND(obj); void *data = PyUnicode_DATA(obj); const QChar *qch = qstr.data(); for (int i = 0; i < py_len; ++i) { ushort uch = qch->unicode(); if (uch > 0x007f) { // This is useless. Py_DECREF(obj); // Work out what kind we really need and what the Python length // should be. Py_UCS4 maxchar = 0x00ff; do { if (uch > 0x00ff) { if (maxchar == 0x00ff) maxchar = 0x00ffff; // See if this is a surrogate pair. We don't need to // bounds check because Qt puts a null QChar on the end. if (qch->isHighSurrogate() && (qch + 1)->isLowSurrogate()) { maxchar = 0x10ffff; --py_len; ++qch; } } uch = (++qch)->unicode(); } while (!qch->isNull()); // Create the correctly sized object. if ((obj = PyUnicode_New(py_len, maxchar)) == NULL) return NULL; kind = PyUnicode_KIND(obj); data = PyUnicode_DATA(obj); qch = qstr.data(); for (int py_i = 0; py_i < py_len; ++py_i) { Py_UCS4 py_ch; if (qch->isHighSurrogate() && (qch + 1)->isLowSurrogate()) { py_ch = QChar::surrogateToUcs4(*qch, *(qch + 1)); ++qch; } else { py_ch = qch->unicode(); } ++qch; PyUnicode_WRITE(kind, data, py_i, py_ch); } break; } ++qch; PyUnicode_WRITE(kind, data, i, uch); } #elif defined(Py_UNICODE_WIDE) QVector<uint> ucs4 = qstr.toUcs4(); if ((obj = PyUnicode_FromUnicode(NULL, ucs4.size())) == NULL) return NULL; memcpy(PyUnicode_AS_UNICODE(obj), ucs4.constData(), ucs4.size() * sizeof (Py_UNICODE)); #else if ((obj = PyUnicode_FromUnicode(NULL, qstr.length())) == NULL) return NULL; memcpy(PyUnicode_AS_UNICODE(obj), qstr.utf16(), qstr.length() * sizeof (Py_UNICODE)); #endif return obj; }
/* define unicode version of xmlescape */ static PyObject *xmlescape_str(PyObject *str, int doquot, int doapos) { Py_ssize_t oldsize; void *olddata; int maxchar = 127; Py_ssize_t i; Py_ssize_t newsize = 0; void *newdata; int kind = PyUnicode_KIND(str); oldsize = PyUnicode_GET_LENGTH(str); olddata = PyUnicode_DATA(str); for (i = 0; i < oldsize; ++i) { Py_UCS4 ch = PyUnicode_READ(kind, olddata, i); if (ch == ((Py_UCS4)'<')) newsize += 4; /* < */ else if (ch == (Py_UCS4)'>') /* Note that we always replace '>' with its entity, not just in case it is part of ']]>' */ newsize += 4; /* > */ else if (ch == (Py_UCS4)'&') newsize += 5; /* & */ else if ((ch == (Py_UCS4)'"') && doquot) newsize += 6; /* " */ else if ((ch == (Py_UCS4)'\'') && doapos) newsize += 5; /* ' */ else if (ch <= 0x8) newsize += 4; else if ((ch >= 0xB) && (ch <= 0x1F) && (ch != 0xD)) newsize += 5; else if ((ch >= 0x7F) && (ch <= 0x9F) && (ch != 0x85)) newsize += 6; else { newsize++; if (ch > maxchar) maxchar = ch; } } if (oldsize==newsize) { /* nothing to replace => return original */ Py_INCREF(str); return str; } else { int index = 0; PyObject *result = PyUnicode_New(newsize, maxchar); newdata = PyUnicode_DATA(result); if (result == NULL) return NULL; for (i = 0; i < oldsize; ++i) { Py_UCS4 ch = PyUnicode_READ(kind, olddata, i); if (ch == (Py_UCS4)'<') { PyUnicode_WRITE(kind, newdata, index++, '&'); PyUnicode_WRITE(kind, newdata, index++, 'l'); PyUnicode_WRITE(kind, newdata, index++, 't'); PyUnicode_WRITE(kind, newdata, index++, ';'); } else if (ch == (Py_UCS4)'>') { PyUnicode_WRITE(kind, newdata, index++, '&'); PyUnicode_WRITE(kind, newdata, index++, 'g'); PyUnicode_WRITE(kind, newdata, index++, 't'); PyUnicode_WRITE(kind, newdata, index++, ';'); } else if (ch == (Py_UCS4)'&') { PyUnicode_WRITE(kind, newdata, index++, '&'); PyUnicode_WRITE(kind, newdata, index++, 'a'); PyUnicode_WRITE(kind, newdata, index++, 'm'); PyUnicode_WRITE(kind, newdata, index++, 'p'); PyUnicode_WRITE(kind, newdata, index++, ';'); } else if ((ch == (Py_UCS4)'"') && doquot) { PyUnicode_WRITE(kind, newdata, index++, '&'); PyUnicode_WRITE(kind, newdata, index++, 'q'); PyUnicode_WRITE(kind, newdata, index++, 'u'); PyUnicode_WRITE(kind, newdata, index++, 'o'); PyUnicode_WRITE(kind, newdata, index++, 't'); PyUnicode_WRITE(kind, newdata, index++, ';'); } else if ((ch == (Py_UCS4)'\'') && doapos) { PyUnicode_WRITE(kind, newdata, index++, '&'); PyUnicode_WRITE(kind, newdata, index++, '#'); PyUnicode_WRITE(kind, newdata, index++, '3'); PyUnicode_WRITE(kind, newdata, index++, '9'); PyUnicode_WRITE(kind, newdata, index++, ';'); } else if (ch <= 0x8) { PyUnicode_WRITE(kind, newdata, index++, '&'); PyUnicode_WRITE(kind, newdata, index++, '#'); PyUnicode_WRITE(kind, newdata, index++, '0'+ch); PyUnicode_WRITE(kind, newdata, index++, ';'); } else if ((ch >= 0xB) && (ch <= 0x1F) && (ch != 0xD)) { PyUnicode_WRITE(kind, newdata, index++, '&'); PyUnicode_WRITE(kind, newdata, index++, '#'); PyUnicode_WRITE(kind, newdata, index++, '0'+ch/10); PyUnicode_WRITE(kind, newdata, index++, '0'+ch%10); PyUnicode_WRITE(kind, newdata, index++, ';'); } else if ((ch >= 0x7F) && (ch <= 0x9F) && (ch != 0x85)) { PyUnicode_WRITE(kind, newdata, index++, '&'); PyUnicode_WRITE(kind, newdata, index++, '#'); PyUnicode_WRITE(kind, newdata, index++, '0'+ch/100); PyUnicode_WRITE(kind, newdata, index++, '0'+(ch/10)%10); PyUnicode_WRITE(kind, newdata, index++, '0'+ch%10); PyUnicode_WRITE(kind, newdata, index++, ';'); } else PyUnicode_WRITE(kind, newdata, index++, ch); } return result; } }