/* New reference. */ static PyObject * normalize_module(PyObject *filename) { PyObject *module; int kind; void *data; Py_ssize_t len; len = PyUnicode_GetLength(filename); if (len < 0) return NULL; if (len == 0) return PyUnicode_FromString("<unknown>"); kind = PyUnicode_KIND(filename); data = PyUnicode_DATA(filename); /* if filename.endswith(".py"): */ if (len >= 3 && PyUnicode_READ(kind, data, len-3) == '.' && PyUnicode_READ(kind, data, len-2) == 'p' && PyUnicode_READ(kind, data, len-1) == 'y') { module = PyUnicode_Substring(filename, 0, len-3); } else { module = filename; Py_INCREF(module); } return module; }
static PyObject * Reader_iternext(ReaderObj *self) { PyObject *fields = NULL; Py_UCS4 c; Py_ssize_t pos, linelen; unsigned int kind; void *data; PyObject *lineobj; if (parse_reset(self) < 0) return NULL; do { lineobj = PyIter_Next(self->input_iter); if (lineobj == NULL) { /* End of input OR exception */ if (!PyErr_Occurred() && self->field_len != 0) PyErr_Format(error_obj, "newline inside string"); return NULL; } if (!PyUnicode_Check(lineobj)) { PyErr_Format(error_obj, "iterator should return strings, " "not %.200s " "(did you open the file in text mode?)", lineobj->ob_type->tp_name ); Py_DECREF(lineobj); return NULL; } ++self->line_num; kind = PyUnicode_KIND(lineobj); data = PyUnicode_DATA(lineobj); pos = 0; linelen = PyUnicode_GET_LENGTH(lineobj); while (linelen--) { c = PyUnicode_READ(kind, data, pos); if (c == '\0') { Py_DECREF(lineobj); PyErr_Format(error_obj, "line contains NULL byte"); goto err; } if (parse_process_char(self, c) < 0) { Py_DECREF(lineobj); goto err; } pos++; } Py_DECREF(lineobj); if (parse_process_char(self, 0) < 0) goto err; } while (self->state != START_RECORD); fields = self->fields; self->fields = NULL; err: return fields; }
/* Internal function to read the codepoint at the given index from the input. */ static Unicode read_codepoint(TokenizerInput* text, Py_ssize_t index) { #ifdef PEP_393 return PyUnicode_READ(text->kind, text->data, index); #else return text->buf[index]; #endif }
static int join_append_lineterminator(WriterObj *self) { Py_ssize_t terminator_len, i; unsigned int term_kind; void *term_data; terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator); if (terminator_len == -1) return 0; /* grow record buffer if necessary */ if (!join_check_rec_size(self, self->rec_len + terminator_len)) return 0; term_kind = PyUnicode_KIND(self->dialect->lineterminator); term_data = PyUnicode_DATA(self->dialect->lineterminator); for (i = 0; i < terminator_len; i++) self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i); self->rec_len += terminator_len; return 1; }
/* Returns 0 on error (no new refs), 1 on success */ static int setup_context(Py_ssize_t stack_level, PyObject **filename, int *lineno, PyObject **module, PyObject **registry) { PyObject *globals; /* Setup globals and lineno. */ PyFrameObject *f = PyThreadState_GET()->frame; // Stack level comparisons to Python code is off by one as there is no // warnings-related stack level to avoid. if (stack_level <= 0 || is_internal_frame(f)) { while (--stack_level > 0 && f != NULL) { f = f->f_back; } } else { while (--stack_level > 0 && f != NULL) { f = next_external_frame(f); } } if (f == NULL) { globals = PyThreadState_Get()->interp->sysdict; *lineno = 1; } else { globals = f->f_globals; *lineno = PyFrame_GetLineNumber(f); } *module = NULL; /* Setup registry. */ assert(globals != NULL); assert(PyDict_Check(globals)); *registry = PyDict_GetItemString(globals, "__warningregistry__"); if (*registry == NULL) { int rc; *registry = PyDict_New(); if (*registry == NULL) return 0; rc = PyDict_SetItemString(globals, "__warningregistry__", *registry); if (rc < 0) goto handle_error; } else Py_INCREF(*registry); /* Setup module. */ *module = PyDict_GetItemString(globals, "__name__"); if (*module == NULL) { *module = PyUnicode_FromString("<string>"); if (*module == NULL) goto handle_error; } else Py_INCREF(*module); /* Setup filename. */ *filename = PyDict_GetItemString(globals, "__file__"); if (*filename != NULL && PyUnicode_Check(*filename)) { Py_ssize_t len; int kind; void *data; if (PyUnicode_READY(*filename)) goto handle_error; len = PyUnicode_GetLength(*filename); kind = PyUnicode_KIND(*filename); data = PyUnicode_DATA(*filename); #define ascii_lower(c) ((c <= 127) ? Py_TOLOWER(c) : 0) /* if filename.lower().endswith(".pyc"): */ if (len >= 4 && PyUnicode_READ(kind, data, len-4) == '.' && ascii_lower(PyUnicode_READ(kind, data, len-3)) == 'p' && ascii_lower(PyUnicode_READ(kind, data, len-2)) == 'y' && ascii_lower(PyUnicode_READ(kind, data, len-1)) == 'c') { *filename = PyUnicode_Substring(*filename, 0, PyUnicode_GET_LENGTH(*filename)-1); if (*filename == NULL) goto handle_error; } else Py_INCREF(*filename); } else { *filename = NULL; if (*module != Py_None && PyUnicode_CompareWithASCIIString(*module, "__main__") == 0) { PyObject *argv = _PySys_GetObjectId(&PyId_argv); /* PyList_Check() is needed because sys.argv is set to None during Python finalization */ if (argv != NULL && PyList_Check(argv) && PyList_Size(argv) > 0) { int is_true; *filename = PyList_GetItem(argv, 0); Py_INCREF(*filename); /* If sys.argv[0] is false, then use '__main__'. */ is_true = PyObject_IsTrue(*filename); if (is_true < 0) { Py_DECREF(*filename); goto handle_error; } else if (!is_true) { Py_XSETREF(*filename, PyUnicode_FromString("__main__")); if (*filename == NULL) goto handle_error; } } else { /* embedded interpreters don't have sys.argv, see bug #839151 */ *filename = PyUnicode_FromString("__main__"); if (*filename == NULL) goto handle_error; } } if (*filename == NULL) { *filename = *module; Py_INCREF(*filename); } } return 1; handle_error: /* filename not XDECREF'ed here as there is no way to jump here with a dangling reference. */ Py_XDECREF(*registry); Py_XDECREF(*module); return 0; }
static void show_warning(PyObject *filename, int lineno, PyObject *text, PyObject *category, PyObject *sourceline) { PyObject *f_stderr; PyObject *name; char lineno_str[128]; _Py_IDENTIFIER(__name__); PyOS_snprintf(lineno_str, sizeof(lineno_str), ":%d: ", lineno); name = _PyObject_GetAttrId(category, &PyId___name__); if (name == NULL) /* XXX Can an object lack a '__name__' attribute? */ goto error; f_stderr = _PySys_GetObjectId(&PyId_stderr); if (f_stderr == NULL) { fprintf(stderr, "lost sys.stderr\n"); goto error; } /* Print "filename:lineno: category: text\n" */ if (PyFile_WriteObject(filename, f_stderr, Py_PRINT_RAW) < 0) goto error; if (PyFile_WriteString(lineno_str, f_stderr) < 0) goto error; if (PyFile_WriteObject(name, f_stderr, Py_PRINT_RAW) < 0) goto error; if (PyFile_WriteString(": ", f_stderr) < 0) goto error; if (PyFile_WriteObject(text, f_stderr, Py_PRINT_RAW) < 0) goto error; if (PyFile_WriteString("\n", f_stderr) < 0) goto error; Py_CLEAR(name); /* Print " source_line\n" */ if (sourceline) { int kind; void *data; Py_ssize_t i, len; Py_UCS4 ch; PyObject *truncated; if (PyUnicode_READY(sourceline) < 1) goto error; kind = PyUnicode_KIND(sourceline); data = PyUnicode_DATA(sourceline); len = PyUnicode_GET_LENGTH(sourceline); for (i=0; i<len; i++) { ch = PyUnicode_READ(kind, data, i); if (ch != ' ' && ch != '\t' && ch != '\014') break; } truncated = PyUnicode_Substring(sourceline, i, len); if (truncated == NULL) goto error; PyFile_WriteObject(sourceline, f_stderr, Py_PRINT_RAW); Py_DECREF(truncated); PyFile_WriteString("\n", f_stderr); } else { _Py_DisplaySourceLine(f_stderr, filename, lineno, 2); } error: Py_XDECREF(name); PyErr_Clear(); }
/* Fill in the digit parts of a numbers's string representation, as determined in calc_number_widths(). Return -1 on error, or 0 on success. */ static int fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end, PyObject *prefix, Py_ssize_t p_start, Py_UCS4 fill_char, LocaleInfo *locale, int toupper) { /* Used to keep track of digits, decimal, and remainder. */ Py_ssize_t d_pos = d_start; const unsigned int kind = writer->kind; const void *data = writer->data; Py_ssize_t r; if (spec->n_lpadding) { _PyUnicode_FastFill(writer->buffer, writer->pos, spec->n_lpadding, fill_char); writer->pos += spec->n_lpadding; } if (spec->n_sign == 1) { PyUnicode_WRITE(kind, data, writer->pos, spec->sign); writer->pos++; } if (spec->n_prefix) { _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, prefix, p_start, spec->n_prefix); if (toupper) { Py_ssize_t t; for (t = 0; t < spec->n_prefix; t++) { Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t); c = Py_TOUPPER(c); assert (c <= 127); PyUnicode_WRITE(kind, data, writer->pos + t, c); } } writer->pos += spec->n_prefix; } if (spec->n_spadding) { _PyUnicode_FastFill(writer->buffer, writer->pos, spec->n_spadding, fill_char); writer->pos += spec->n_spadding; } /* Only for type 'c' special case, it has no digits. */ if (spec->n_digits != 0) { /* Fill the digits with InsertThousandsGrouping. */ char *pdigits; if (PyUnicode_READY(digits)) return -1; pdigits = PyUnicode_DATA(digits); if (PyUnicode_KIND(digits) < kind) { pdigits = _PyUnicode_AsKind(digits, kind); if (pdigits == NULL) return -1; } r = _PyUnicode_InsertThousandsGrouping( writer->buffer, writer->pos, spec->n_grouped_digits, pdigits + kind * d_pos, spec->n_digits, spec->n_min_width, locale->grouping, locale->thousands_sep, NULL); if (r == -1) return -1; assert(r == spec->n_grouped_digits); if (PyUnicode_KIND(digits) < kind) PyMem_Free(pdigits); d_pos += spec->n_digits; } if (toupper) { Py_ssize_t t; for (t = 0; t < spec->n_grouped_digits; t++) { Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t); c = Py_TOUPPER(c); if (c > 127) { PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit"); return -1; } PyUnicode_WRITE(kind, data, writer->pos + t, c); } } writer->pos += spec->n_grouped_digits; if (spec->n_decimal) { _PyUnicode_FastCopyCharacters( writer->buffer, writer->pos, locale->decimal_point, 0, spec->n_decimal); writer->pos += spec->n_decimal; d_pos += 1; } if (spec->n_remainder) { _PyUnicode_FastCopyCharacters( writer->buffer, writer->pos, digits, d_pos, spec->n_remainder); writer->pos += spec->n_remainder; /* d_pos += spec->n_remainder; */ } if (spec->n_rpadding) { _PyUnicode_FastFill(writer->buffer, writer->pos, spec->n_rpadding, fill_char); writer->pos += spec->n_rpadding; } return 0; }
/* Calculate new record length or append field to record. Return new * record length. */ static Py_ssize_t join_append_data(WriterObj *self, unsigned int field_kind, void *field_data, Py_ssize_t field_len, int quote_empty, int *quoted, int copy_phase) { DialectObj *dialect = self->dialect; int i; Py_ssize_t rec_len; #define ADDCH(c) \ do {\ if (copy_phase) \ self->rec[rec_len] = c;\ rec_len++;\ } while(0) rec_len = self->rec_len; /* If this is not the first field we need a field separator */ if (self->num_fields > 0) ADDCH(dialect->delimiter); /* Handle preceding quote */ if (copy_phase && *quoted) ADDCH(dialect->quotechar); /* Copy/count field data */ /* If field is null just pass over */ for (i = 0; field_data && (i < field_len); i++) { Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i); int want_escape = 0; if (c == dialect->delimiter || c == dialect->escapechar || c == dialect->quotechar || PyUnicode_FindChar( dialect->lineterminator, c, 0, PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) { if (dialect->quoting == QUOTE_NONE) want_escape = 1; else { if (c == dialect->quotechar) { if (dialect->doublequote) ADDCH(dialect->quotechar); else want_escape = 1; } if (!want_escape) *quoted = 1; } if (want_escape) { if (!dialect->escapechar) { PyErr_Format(error_obj, "need to escape, but no escapechar set"); return -1; } ADDCH(dialect->escapechar); } } /* Copy field character into record buffer. */ ADDCH(c); } /* If field is empty check if it needs to be quoted. */ if (i == 0 && quote_empty) { if (dialect->quoting == QUOTE_NONE) { PyErr_Format(error_obj, "single empty field record must be quoted"); return -1; } else *quoted = 1; } if (*quoted) { if (copy_phase) ADDCH(dialect->quotechar); else rec_len += 2; } return rec_len; #undef ADDCH }
static void dump_ascii(int fd, PyObject *text) { PyASCIIObject *ascii = (PyASCIIObject *)text; Py_ssize_t i, size; int truncated; int kind; void *data = NULL; wchar_t *wstr = NULL; Py_UCS4 ch; size = ascii->length; kind = ascii->state.kind; if (ascii->state.compact) { if (ascii->state.ascii) data = ((PyASCIIObject*)text) + 1; else data = ((PyCompactUnicodeObject*)text) + 1; } else if (kind != PyUnicode_WCHAR_KIND) { data = ((PyUnicodeObject *)text)->data.any; if (data == NULL) return; } else { wstr = ((PyASCIIObject *)text)->wstr; if (wstr == NULL) return; size = ((PyCompactUnicodeObject *)text)->wstr_length; } if (MAX_STRING_LENGTH < size) { size = MAX_STRING_LENGTH; truncated = 1; } else truncated = 0; for (i=0; i < size; i++) { if (kind != PyUnicode_WCHAR_KIND) ch = PyUnicode_READ(kind, data, i); else ch = wstr[i]; if (ch < 128) { char c = (char)ch; write(fd, &c, 1); } else if (ch < 0xff) { PUTS(fd, "\\x"); dump_hexadecimal(fd, ch, 2); } else if (ch < 0xffff) { PUTS(fd, "\\u"); dump_hexadecimal(fd, ch, 4); } else { PUTS(fd, "\\U"); dump_hexadecimal(fd, ch, 8); } } if (truncated) PUTS(fd, "..."); }
int _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent) { int err = 0; int fd; int i; char *found_encoding; char *encoding; PyObject *io; PyObject *binary; PyObject *fob = NULL; PyObject *lineobj = NULL; PyObject *res; char buf[MAXPATHLEN+1]; int kind; void *data; /* open the file */ if (filename == NULL) return 0; io = PyImport_ImportModuleNoBlock("io"); if (io == NULL) return -1; binary = _PyObject_CallMethodId(io, &PyId_open, "Os", filename, "rb"); if (binary == NULL) { PyErr_Clear(); binary = _Py_FindSourceFile(filename, buf, sizeof(buf), io); if (binary == NULL) { Py_DECREF(io); return -1; } } /* use the right encoding to decode the file as unicode */ fd = PyObject_AsFileDescriptor(binary); if (fd < 0) { Py_DECREF(io); Py_DECREF(binary); return 0; } found_encoding = PyTokenizer_FindEncodingFilename(fd, filename); if (found_encoding == NULL) PyErr_Clear(); encoding = (found_encoding != NULL) ? found_encoding : "utf-8"; /* Reset position */ if (lseek(fd, 0, SEEK_SET) == (off_t)-1) { Py_DECREF(io); Py_DECREF(binary); PyMem_FREE(found_encoding); return 0; } fob = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "Os", binary, encoding); Py_DECREF(io); Py_DECREF(binary); PyMem_FREE(found_encoding); if (fob == NULL) { PyErr_Clear(); return 0; } /* get the line number lineno */ for (i = 0; i < lineno; i++) { Py_XDECREF(lineobj); lineobj = PyFile_GetLine(fob, -1); if (!lineobj) { err = -1; break; } } res = _PyObject_CallMethodId(fob, &PyId_close, ""); if (res) Py_DECREF(res); else PyErr_Clear(); Py_DECREF(fob); if (!lineobj || !PyUnicode_Check(lineobj)) { Py_XDECREF(lineobj); return err; } /* remove the indentation of the line */ kind = PyUnicode_KIND(lineobj); data = PyUnicode_DATA(lineobj); for (i=0; i < PyUnicode_GET_LENGTH(lineobj); i++) { Py_UCS4 ch = PyUnicode_READ(kind, data, i); if (ch != ' ' && ch != '\t' && ch != '\014') break; } if (i) { PyObject *truncated; truncated = PyUnicode_Substring(lineobj, i, PyUnicode_GET_LENGTH(lineobj)); if (truncated) { Py_DECREF(lineobj); lineobj = truncated; } else { PyErr_Clear(); } } /* Write some spaces before the line */ strcpy(buf, " "); assert (strlen(buf) == 10); while (indent > 0) { if (indent < 10) buf[indent] = '\0'; err = PyFile_WriteString(buf, f); if (err != 0) break; indent -= 10; } /* finally display the line */ if (err == 0) err = PyFile_WriteObject(lineobj, f, Py_PRINT_RAW); Py_DECREF(lineobj); if (err == 0) err = PyFile_WriteString("\n", f); return err; }
void _Py_DumpASCII(int fd, PyObject *text) { PyASCIIObject *ascii = (PyASCIIObject *)text; Py_ssize_t i, size; int truncated; int kind; void *data = NULL; wchar_t *wstr = NULL; Py_UCS4 ch; if (!PyUnicode_Check(text)) return; size = ascii->length; kind = ascii->state.kind; if (kind == PyUnicode_WCHAR_KIND) { wstr = ((PyASCIIObject *)text)->wstr; if (wstr == NULL) return; size = ((PyCompactUnicodeObject *)text)->wstr_length; } else if (ascii->state.compact) { if (ascii->state.ascii) data = ((PyASCIIObject*)text) + 1; else data = ((PyCompactUnicodeObject*)text) + 1; } else { data = ((PyUnicodeObject *)text)->data.any; if (data == NULL) return; } if (MAX_STRING_LENGTH < size) { size = MAX_STRING_LENGTH; truncated = 1; } else { truncated = 0; } for (i=0; i < size; i++) { if (kind != PyUnicode_WCHAR_KIND) ch = PyUnicode_READ(kind, data, i); else ch = wstr[i]; if (' ' <= ch && ch <= 126) { /* printable ASCII character */ char c = (char)ch; _Py_write_noraise(fd, &c, 1); } else if (ch <= 0xff) { PUTS(fd, "\\x"); _Py_DumpHexadecimal(fd, ch, 2); } else if (ch <= 0xffff) { PUTS(fd, "\\u"); _Py_DumpHexadecimal(fd, ch, 4); } else { PUTS(fd, "\\U"); _Py_DumpHexadecimal(fd, ch, 8); } } if (truncated) { PUTS(fd, "..."); } }
/* Calculate new record length or append field to record. Return new * record length. */ static Py_ssize_t join_append_data(WriterObj *self, unsigned int field_kind, void *field_data, Py_ssize_t field_len, int *quoted, int copy_phase) { DialectObj *dialect = self->dialect; int i; Py_ssize_t rec_len; #define INCLEN \ do {\ if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \ goto overflow; \ } \ rec_len++; \ } while(0) #define ADDCH(c) \ do {\ if (copy_phase) \ self->rec[rec_len] = c;\ INCLEN;\ } while(0) rec_len = self->rec_len; /* If this is not the first field we need a field separator */ if (self->num_fields > 0) ADDCH(dialect->delimiter); /* Handle preceding quote */ if (copy_phase && *quoted) ADDCH(dialect->quotechar); /* Copy/count field data */ /* If field is null just pass over */ for (i = 0; field_data && (i < field_len); i++) { Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i); int want_escape = 0; if (c == dialect->delimiter || c == dialect->escapechar || c == dialect->quotechar || PyUnicode_FindChar( dialect->lineterminator, c, 0, PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) { if (dialect->quoting == QUOTE_NONE) want_escape = 1; else { if (c == dialect->quotechar) { if (dialect->doublequote) ADDCH(dialect->quotechar); else want_escape = 1; } if (!want_escape) *quoted = 1; } if (want_escape) { if (!dialect->escapechar) { PyErr_Format(_csvstate_global->error_obj, "need to escape, but no escapechar set"); return -1; } ADDCH(dialect->escapechar); } } /* Copy field character into record buffer. */ ADDCH(c); } if (*quoted) { if (copy_phase) ADDCH(dialect->quotechar); else { INCLEN; /* starting quote */ INCLEN; /* ending quote */ } } return rec_len; overflow: PyErr_NoMemory(); return -1; #undef ADDCH #undef INCLEN }
static PyObject * parse(PyObject *self, PyObject *args, PyObject *kwargs) { PyObject *string; if (!PyArg_ParseTuple(args, "U", &string)) return NULL; if (PyUnicode_READY(string) == -1) return NULL; Py_ssize_t string_length = PyUnicode_GET_LENGTH(string); int string_kind = PyUnicode_KIND(string); void *string_data = PyUnicode_DATA(string); Py_UCS4 c; int i; Symbol token = {.kind = UNDECIDED, .data=NULL}; int escape = 0; for (i = 0; i < string_length; i++) { c = PyUnicode_READ(string_kind, string_data, i); switch (c) { case 0x22: // " if (escape) { } else { } break; case 0x27: // ' if (escape) { } else { } break; case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37: case 0x38: case 0x39: switch (token.kind) { case UNDECIDED: token.kind = INTEGER; token.data = c - 0x30; break; case INTEGER: token.data += (c - 0x30); break; } case 0x5c: // backslash if (escape) { printf("todo literal backslash\n"); escape = 0; } else { escape = 1; } break; CASE__WHITE_SPACE: switch (token.kind) { case INTEGER: case IDENTIFIER: printf("whitespace new token\n"); break; } break; CASE__XID_START: switch (token.kind) { case UNDECIDED: token.kind = IDENTIFIER; break; case INTEGER: printf("todo\n"); break; } break; CASE__XID_CONTINUE__EXCLUDING__XID_START: switch (token.kind) { case UNDECIDED: printf("continue start?\n"); break; case INTEGER: printf("continue integer\n"); break; } break; default: printf("unclassified"); } } PyErr_SetString(PyExc_NotImplementedError, "parse"); return NULL; }
/* define unicode version of xmlescape */ static PyObject *xmlescape_str(PyObject *str, int doquot, int doapos) { Py_ssize_t oldsize; void *olddata; int maxchar = 127; Py_ssize_t i; Py_ssize_t newsize = 0; void *newdata; int kind = PyUnicode_KIND(str); oldsize = PyUnicode_GET_LENGTH(str); olddata = PyUnicode_DATA(str); for (i = 0; i < oldsize; ++i) { Py_UCS4 ch = PyUnicode_READ(kind, olddata, i); if (ch == ((Py_UCS4)'<')) newsize += 4; /* < */ else if (ch == (Py_UCS4)'>') /* Note that we always replace '>' with its entity, not just in case it is part of ']]>' */ newsize += 4; /* > */ else if (ch == (Py_UCS4)'&') newsize += 5; /* & */ else if ((ch == (Py_UCS4)'"') && doquot) newsize += 6; /* " */ else if ((ch == (Py_UCS4)'\'') && doapos) newsize += 5; /* ' */ else if (ch <= 0x8) newsize += 4; else if ((ch >= 0xB) && (ch <= 0x1F) && (ch != 0xD)) newsize += 5; else if ((ch >= 0x7F) && (ch <= 0x9F) && (ch != 0x85)) newsize += 6; else { newsize++; if (ch > maxchar) maxchar = ch; } } if (oldsize==newsize) { /* nothing to replace => return original */ Py_INCREF(str); return str; } else { int index = 0; PyObject *result = PyUnicode_New(newsize, maxchar); newdata = PyUnicode_DATA(result); if (result == NULL) return NULL; for (i = 0; i < oldsize; ++i) { Py_UCS4 ch = PyUnicode_READ(kind, olddata, i); if (ch == (Py_UCS4)'<') { PyUnicode_WRITE(kind, newdata, index++, '&'); PyUnicode_WRITE(kind, newdata, index++, 'l'); PyUnicode_WRITE(kind, newdata, index++, 't'); PyUnicode_WRITE(kind, newdata, index++, ';'); } else if (ch == (Py_UCS4)'>') { PyUnicode_WRITE(kind, newdata, index++, '&'); PyUnicode_WRITE(kind, newdata, index++, 'g'); PyUnicode_WRITE(kind, newdata, index++, 't'); PyUnicode_WRITE(kind, newdata, index++, ';'); } else if (ch == (Py_UCS4)'&') { PyUnicode_WRITE(kind, newdata, index++, '&'); PyUnicode_WRITE(kind, newdata, index++, 'a'); PyUnicode_WRITE(kind, newdata, index++, 'm'); PyUnicode_WRITE(kind, newdata, index++, 'p'); PyUnicode_WRITE(kind, newdata, index++, ';'); } else if ((ch == (Py_UCS4)'"') && doquot) { PyUnicode_WRITE(kind, newdata, index++, '&'); PyUnicode_WRITE(kind, newdata, index++, 'q'); PyUnicode_WRITE(kind, newdata, index++, 'u'); PyUnicode_WRITE(kind, newdata, index++, 'o'); PyUnicode_WRITE(kind, newdata, index++, 't'); PyUnicode_WRITE(kind, newdata, index++, ';'); } else if ((ch == (Py_UCS4)'\'') && doapos) { PyUnicode_WRITE(kind, newdata, index++, '&'); PyUnicode_WRITE(kind, newdata, index++, '#'); PyUnicode_WRITE(kind, newdata, index++, '3'); PyUnicode_WRITE(kind, newdata, index++, '9'); PyUnicode_WRITE(kind, newdata, index++, ';'); } else if (ch <= 0x8) { PyUnicode_WRITE(kind, newdata, index++, '&'); PyUnicode_WRITE(kind, newdata, index++, '#'); PyUnicode_WRITE(kind, newdata, index++, '0'+ch); PyUnicode_WRITE(kind, newdata, index++, ';'); } else if ((ch >= 0xB) && (ch <= 0x1F) && (ch != 0xD)) { PyUnicode_WRITE(kind, newdata, index++, '&'); PyUnicode_WRITE(kind, newdata, index++, '#'); PyUnicode_WRITE(kind, newdata, index++, '0'+ch/10); PyUnicode_WRITE(kind, newdata, index++, '0'+ch%10); PyUnicode_WRITE(kind, newdata, index++, ';'); } else if ((ch >= 0x7F) && (ch <= 0x9F) && (ch != 0x85)) { PyUnicode_WRITE(kind, newdata, index++, '&'); PyUnicode_WRITE(kind, newdata, index++, '#'); PyUnicode_WRITE(kind, newdata, index++, '0'+ch/100); PyUnicode_WRITE(kind, newdata, index++, '0'+(ch/10)%10); PyUnicode_WRITE(kind, newdata, index++, '0'+ch%10); PyUnicode_WRITE(kind, newdata, index++, ';'); } else PyUnicode_WRITE(kind, newdata, index++, ch); } return result; } }