/* Given a (sub)modulename, write the potential file path in the archive (without extension) to the path buffer. Return the length of the resulting string. return self.prefix + name.replace('.', os.sep) */ static PyObject* make_filename(PyObject *prefix, PyObject *name) { PyObject *pathobj; Py_UCS4 *p, *buf; Py_ssize_t len; len = PyUnicode_GET_LENGTH(prefix) + PyUnicode_GET_LENGTH(name) + 1; p = buf = PyMem_New(Py_UCS4, len); if (buf == NULL) { PyErr_NoMemory(); return NULL; } if (!PyUnicode_AsUCS4(prefix, p, len, 0)) { PyMem_Free(buf); return NULL; } p += PyUnicode_GET_LENGTH(prefix); len -= PyUnicode_GET_LENGTH(prefix); if (!PyUnicode_AsUCS4(name, p, len, 1)) { PyMem_Free(buf); return NULL; } for (; *p; p++) { if (*p == '.') *p = SEP; } pathobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf, p-buf); PyMem_Free(buf); return pathobj; }
static PyObject * zipimport_zipimporter_get_data_impl(ZipImporter *self, PyObject *path) /*[clinic end generated code: output=65dc506aaa268436 input=fa6428b74843c4ae]*/ { PyObject *key; PyObject *toc_entry; Py_ssize_t path_start, path_len, len; if (self->archive == NULL) { PyErr_SetString(PyExc_ValueError, "zipimporter.__init__() wasn't called"); return NULL; } #ifdef ALTSEP path = _PyObject_CallMethodId((PyObject *)&PyUnicode_Type, &PyId_replace, "OCC", path, ALTSEP, SEP); if (!path) return NULL; #else Py_INCREF(path); #endif if (PyUnicode_READY(path) == -1) goto error; path_len = PyUnicode_GET_LENGTH(path); len = PyUnicode_GET_LENGTH(self->archive); path_start = 0; if (PyUnicode_Tailmatch(path, self->archive, 0, len, -1) && PyUnicode_READ_CHAR(path, len) == SEP) { path_start = len + 1; } key = PyUnicode_Substring(path, path_start, path_len); if (key == NULL) goto error; toc_entry = PyDict_GetItem(self->files, key); if (toc_entry == NULL) { PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, key); Py_DECREF(key); goto error; } Py_DECREF(key); Py_DECREF(path); return get_data(self->archive, toc_entry); error: Py_DECREF(path); return NULL; }
static PyObject * zipimporter_get_data(PyObject *obj, PyObject *args) { ZipImporter *self = (ZipImporter *)obj; PyObject *path, *key; #ifdef ALTSEP _Py_IDENTIFIER(replace); #endif PyObject *toc_entry; Py_ssize_t path_start, path_len, len; if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &path)) return NULL; #ifdef ALTSEP path = _PyObject_CallMethodId(path, &PyId_replace, "CC", ALTSEP, SEP); if (!path) return NULL; #else Py_INCREF(path); #endif if (PyUnicode_READY(path) == -1) goto error; path_len = PyUnicode_GET_LENGTH(path); len = PyUnicode_GET_LENGTH(self->archive); path_start = 0; if (PyUnicode_Tailmatch(path, self->archive, 0, len, -1) && PyUnicode_READ_CHAR(path, len) == SEP) { path_start = len + 1; } key = PyUnicode_Substring(path, path_start, path_len); if (key == NULL) goto error; toc_entry = PyDict_GetItem(self->files, key); if (toc_entry == NULL) { PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key); Py_DECREF(key); goto error; } Py_DECREF(key); Py_DECREF(path); return get_data(self->archive, toc_entry); error: Py_DECREF(path); return NULL; }
/* Given a path to a .pyc file in the archive, return the modification time of the matching .py file, or 0 if no source is available. */ static time_t get_mtime_of_source(ZipImporter *self, PyObject *path) { PyObject *toc_entry, *stripped; time_t mtime; /* strip 'c' or 'o' from *.py[co] */ if (PyUnicode_READY(path) == -1) return (time_t)-1; stripped = PyUnicode_FromKindAndData(PyUnicode_KIND(path), PyUnicode_DATA(path), PyUnicode_GET_LENGTH(path) - 1); if (stripped == NULL) return (time_t)-1; toc_entry = PyDict_GetItem(self->files, stripped); Py_DECREF(stripped); if (toc_entry != NULL && PyTuple_Check(toc_entry) && PyTuple_Size(toc_entry) == 8) { /* fetch the time stamp of the .py file for comparison with an embedded pyc time stamp */ int time, date; time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5)); date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6)); mtime = parse_dostime(time, date); } else mtime = 0; return mtime; }
static int asciistr_getbuffer(PyObject *exporter, Py_buffer *view, int flags) { return (PyBuffer_FillInfo(view, exporter, PyUnicode_DATA(exporter), PyUnicode_GET_LENGTH(exporter), 1, flags)); }
// Convert a Python Unicode object to a QString. QString qpycore_PyObject_AsQString(PyObject *obj) { #if defined(PYQT_PEP_393) SIP_SSIZE_T len = PyUnicode_GET_LENGTH(obj); switch (PyUnicode_KIND(obj)) { case PyUnicode_1BYTE_KIND: return QString::fromLatin1((char *)PyUnicode_1BYTE_DATA(obj), len); case PyUnicode_2BYTE_KIND: // The (QChar *) cast should be safe. return QString((QChar *)PyUnicode_2BYTE_DATA(obj), len); case PyUnicode_4BYTE_KIND: return QString::fromUcs4(PyUnicode_4BYTE_DATA(obj), len); } return QString(); #elif defined(Py_UNICODE_WIDE) return QString::fromUcs4((const uint *)PyUnicode_AS_UNICODE(obj), PyUnicode_GET_SIZE(obj)); #else return QString::fromUtf16((const ushort *)PyUnicode_AS_UNICODE(obj), PyUnicode_GET_SIZE(obj)); #endif }
static PyObject * Reader_iternext(ReaderObj *self) { PyObject *fields = NULL; Py_UCS4 c; Py_ssize_t pos, linelen; unsigned int kind; void *data; PyObject *lineobj; if (parse_reset(self) < 0) return NULL; do { lineobj = PyIter_Next(self->input_iter); if (lineobj == NULL) { /* End of input OR exception */ if (!PyErr_Occurred() && self->field_len != 0) PyErr_Format(error_obj, "newline inside string"); return NULL; } if (!PyUnicode_Check(lineobj)) { PyErr_Format(error_obj, "iterator should return strings, " "not %.200s " "(did you open the file in text mode?)", lineobj->ob_type->tp_name ); Py_DECREF(lineobj); return NULL; } ++self->line_num; kind = PyUnicode_KIND(lineobj); data = PyUnicode_DATA(lineobj); pos = 0; linelen = PyUnicode_GET_LENGTH(lineobj); while (linelen--) { c = PyUnicode_READ(kind, data, pos); if (c == '\0') { Py_DECREF(lineobj); PyErr_Format(error_obj, "line contains NULL byte"); goto err; } if (parse_process_char(self, c) < 0) { Py_DECREF(lineobj); goto err; } pos++; } Py_DECREF(lineobj); if (parse_process_char(self, 0) < 0) goto err; } while (self->state != START_RECORD); fields = self->fields; self->fields = NULL; err: return fields; }
static int join_append(WriterObj *self, PyObject *field, int *quoted, int quote_empty) { unsigned int field_kind = -1; void *field_data = NULL; Py_ssize_t field_len = 0; Py_ssize_t rec_len; if (field != NULL) { field_kind = PyUnicode_KIND(field); field_data = PyUnicode_DATA(field); field_len = PyUnicode_GET_LENGTH(field); } rec_len = join_append_data(self, field_kind, field_data, field_len, quote_empty, quoted, 0); if (rec_len < 0) return 0; /* grow record buffer if necessary */ if (!join_check_rec_size(self, rec_len)) return 0; self->rec_len = join_append_data(self, field_kind, field_data, field_len, quote_empty, quoted, 1); self->num_fields++; return 1; }
// Convert a Python Unicode object to a QString. QString qpycore_PyObject_AsQString(PyObject *obj) { #if defined(PYQT_PEP_393) SIP_SSIZE_T len = PyUnicode_GET_LENGTH(obj); switch (PyUnicode_KIND(obj)) { case PyUnicode_1BYTE_KIND: return QString::fromLatin1((char *)PyUnicode_1BYTE_DATA(obj), len); case PyUnicode_2BYTE_KIND: // The (QChar *) cast should be safe. return QString((QChar *)PyUnicode_2BYTE_DATA(obj), len); case PyUnicode_4BYTE_KIND: #if QT_VERSION >= 0x040200 return QString::fromUcs4(PyUnicode_4BYTE_DATA(obj), len); #else // Note that this code doesn't handle code points greater than 0xffff // very well. QString qstr; Py_UCS4 *ucode = PyUnicode_4BYTE_DATA(obj); for (SIP_SSIZE_T i = 0; i < len; ++i) qstr.append((uint)ucode[i]); return qstr; #endif } return QString(); #elif defined(Py_UNICODE_WIDE) #if QT_VERSION >= 0x040200 return QString::fromUcs4((const uint *)PyUnicode_AS_UNICODE(obj), PyUnicode_GET_SIZE(obj)); #else // Note that this code doesn't handle code points greater than 0xffff very // well. QString qstr; Py_UNICODE *ucode = PyUnicode_AS_UNICODE(obj); SIP_SSIZE_T len = PyUnicode_GET_SIZE(obj); for (SIP_SSIZE_T i = 0; i < len; ++i) qstr.append((uint)ucode[i]); return qstr; #endif #else return QString::fromUtf16((const ushort *)PyUnicode_AS_UNICODE(obj), PyUnicode_GET_SIZE(obj)); #endif }
static PyObject * complex__format__(PyObject* self, PyObject* args) { PyObject *format_spec; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; return _PyComplex_FormatAdvanced(self, format_spec, 0, PyUnicode_GET_LENGTH(format_spec)); }
static PyObject * zipimporter_repr(ZipImporter *self) { if (self->archive == NULL) return PyUnicode_FromString("<zipimporter object \"???\">"); else if (self->prefix != NULL && PyUnicode_GET_LENGTH(self->prefix) != 0) return PyUnicode_FromFormat("<zipimporter object \"%U%c%U\">", self->archive, SEP, self->prefix); else return PyUnicode_FromFormat("<zipimporter object \"%U\">", self->archive); }
static int _str_shared(PyObject *obj, _PyCrossInterpreterData *data) { struct _shared_str_data *shared = PyMem_NEW(struct _shared_str_data, 1); shared->kind = PyUnicode_KIND(obj); shared->buffer = PyUnicode_DATA(obj); shared->len = PyUnicode_GET_LENGTH(obj) - 1; data->data = (void *)shared; data->obj = obj; // Will be "released" (decref'ed) when data released. data->new_object = _new_str_object; data->free = PyMem_Free; return 0; }
/* return fullname.split(".")[-1] */ static PyObject * get_subname(PyObject *fullname) { Py_ssize_t len, dot; if (PyUnicode_READY(fullname) < 0) return NULL; len = PyUnicode_GET_LENGTH(fullname); dot = PyUnicode_FindChar(fullname, '.', 0, len, -1); if (dot == -1) { Py_INCREF(fullname); return fullname; } else return PyUnicode_Substring(fullname, dot+1, len); }
static PyObject * syslog_get_argv(void) { /* Figure out what to use for as the program "ident" for openlog(). * This swallows exceptions and continues rather than failing out, * because the syslog module can still be used because openlog(3) * is optional. */ Py_ssize_t argv_len, scriptlen; PyObject *scriptobj; Py_ssize_t slash; PyObject *argv = PySys_GetObject("argv"); if (argv == NULL) { return(NULL); } argv_len = PyList_Size(argv); if (argv_len == -1) { PyErr_Clear(); return(NULL); } if (argv_len == 0) { return(NULL); } scriptobj = PyList_GetItem(argv, 0); if (!PyUnicode_Check(scriptobj)) { return(NULL); } scriptlen = PyUnicode_GET_LENGTH(scriptobj); if (scriptlen == 0) { return(NULL); } slash = PyUnicode_FindChar(scriptobj, SEP, 0, scriptlen, -1); if (slash == -2) return NULL; if (slash != -1) { return PyUnicode_Substring(scriptobj, slash, scriptlen); } else { Py_INCREF(scriptobj); return(scriptobj); } return(NULL); }
/* all_name_chars(s): true iff s matches [a-zA-Z0-9_]* */ static int all_name_chars(PyObject *o) { const unsigned char *s, *e; if (!PyUnicode_IS_ASCII(o)) return 0; s = PyUnicode_1BYTE_DATA(o); e = s + PyUnicode_GET_LENGTH(o); for (; s != e; s++) { if (!Py_ISALNUM(*s) && *s != '_') return 0; } return 1; }
static NUMBA_INLINE Py_UCS4 __Numba_PyObject_AsPy_UCS4(PyObject* x) { long ival; if (PyUnicode_Check(x)) { Py_ssize_t length; #if CYTHON_PEP393_ENABLED length = PyUnicode_GET_LENGTH(x); if (likely(length == 1)) { return PyUnicode_READ_CHAR(x, 0); } #else length = PyUnicode_GET_SIZE(x); if (likely(length == 1)) { return PyUnicode_AS_UNICODE(x)[0]; } #if Py_UNICODE_SIZE == 2 else if (PyUnicode_GET_SIZE(x) == 2) { Py_UCS4 high_val = PyUnicode_AS_UNICODE(x)[0]; if (high_val >= 0xD800 && high_val <= 0xDBFF) { Py_UCS4 low_val = PyUnicode_AS_UNICODE(x)[1]; if (low_val >= 0xDC00 && low_val <= 0xDFFF) { return 0x10000 + (((high_val & ((1<<10)-1)) << 10) | (low_val & ((1<<10)-1))); } } } #endif #endif PyErr_Format(PyExc_ValueError, "only single character unicode strings can be converted to Py_UCS4, " "got length %" CYTHON_FORMAT_SSIZE_T "d", length); return (Py_UCS4)-1; } ival = __Numba_PyInt_AsLong(x); if (unlikely(ival < 0)) { if (!PyErr_Occurred()) PyErr_SetString(PyExc_OverflowError, "cannot convert negative value to Py_UCS4"); return (Py_UCS4)-1; } else if (unlikely(ival > 1114111)) { PyErr_SetString(PyExc_OverflowError, "value too large to convert to Py_UCS4"); return (Py_UCS4)-1; } return (Py_UCS4)ival; }
static PyObject * complex__format__(PyObject* self, PyObject* args) { PyObject *format_spec; _PyUnicodeWriter writer; int ret; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; _PyUnicodeWriter_Init(&writer); ret = _PyComplex_FormatAdvancedWriter( &writer, self, format_spec, 0, PyUnicode_GET_LENGTH(format_spec)); if (ret == -1) { _PyUnicodeWriter_Dealloc(&writer); return NULL; } return _PyUnicodeWriter_Finish(&writer); }
static int join_append_lineterminator(WriterObj *self) { Py_ssize_t terminator_len, i; unsigned int term_kind; void *term_data; terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator); if (terminator_len == -1) return 0; /* grow record buffer if necessary */ if (!join_check_rec_size(self, self->rec_len + terminator_len)) return 0; term_kind = PyUnicode_KIND(self->dialect->lineterminator); term_data = PyUnicode_DATA(self->dialect->lineterminator); for (i = 0; i < terminator_len; i++) self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i); self->rec_len += terminator_len; return 1; }
Py_ssize_t PyUnicode_INDEX(const char *text, Py_ssize_t index) { PyObject *u; Py_ssize_t i; /* Prevent excessive micro allocations */ char buffer[256]; size_t buffer_size = Py_ARRAY_LENGTH(buffer); char *s = buffer; size_t l; /* Short-circuit */ if (index == 0) return 0; l = strlen(text); if (index > l) index = l; if (index >= buffer_size) { s = PyMem_RawMalloc(index+1); if (s == NULL) return -1; } strncpy(s, text, index); s[index] = '\0'; u = PyUnicode_DECODE(s); if (s != buffer) PyMem_RawFree(s); if (u == NULL) return -1; i = PyUnicode_GET_LENGTH(u); Py_DECREF(u); return i; }
static PyObject * namespace_repr(PyObject *ns) { int i, loop_error = 0; PyObject *pairs = NULL, *d = NULL, *keys = NULL, *keys_iter = NULL; PyObject *key; PyObject *separator, *pairsrepr, *repr = NULL; const char * name; name = (Py_TYPE(ns) == &_PyNamespace_Type) ? "namespace" : ns->ob_type->tp_name; i = Py_ReprEnter(ns); if (i != 0) { return i > 0 ? PyUnicode_FromFormat("%s(...)", name) : NULL; } pairs = PyList_New(0); if (pairs == NULL) goto error; d = ((_PyNamespaceObject *)ns)->ns_dict; assert(d != NULL); Py_INCREF(d); keys = PyDict_Keys(d); if (keys == NULL) goto error; if (PyList_Sort(keys) != 0) goto error; keys_iter = PyObject_GetIter(keys); if (keys_iter == NULL) goto error; while ((key = PyIter_Next(keys_iter)) != NULL) { if (PyUnicode_Check(key) && PyUnicode_GET_LENGTH(key) > 0) { PyObject *value, *item; value = PyDict_GetItem(d, key); assert(value != NULL); item = PyUnicode_FromFormat("%S=%R", key, value); if (item == NULL) { loop_error = 1; } else { loop_error = PyList_Append(pairs, item); Py_DECREF(item); } } Py_DECREF(key); if (loop_error) goto error; } separator = PyUnicode_FromString(", "); if (separator == NULL) goto error; pairsrepr = PyUnicode_Join(separator, pairs); Py_DECREF(separator); if (pairsrepr == NULL) goto error; repr = PyUnicode_FromFormat("%s(%S)", name, pairsrepr); Py_DECREF(pairsrepr); error: Py_XDECREF(pairs); Py_XDECREF(d); Py_XDECREF(keys); Py_XDECREF(keys_iter); Py_ReprLeave(ns); return repr; }
/* Returns 0 on error (no new refs), 1 on success */ static int setup_context(Py_ssize_t stack_level, PyObject **filename, int *lineno, PyObject **module, PyObject **registry) { PyObject *globals; /* Setup globals and lineno. */ PyFrameObject *f = PyThreadState_GET()->frame; // Stack level comparisons to Python code is off by one as there is no // warnings-related stack level to avoid. if (stack_level <= 0 || is_internal_frame(f)) { while (--stack_level > 0 && f != NULL) { f = f->f_back; } } else { while (--stack_level > 0 && f != NULL) { f = next_external_frame(f); } } if (f == NULL) { globals = PyThreadState_Get()->interp->sysdict; *lineno = 1; } else { globals = f->f_globals; *lineno = PyFrame_GetLineNumber(f); } *module = NULL; /* Setup registry. */ assert(globals != NULL); assert(PyDict_Check(globals)); *registry = PyDict_GetItemString(globals, "__warningregistry__"); if (*registry == NULL) { int rc; *registry = PyDict_New(); if (*registry == NULL) return 0; rc = PyDict_SetItemString(globals, "__warningregistry__", *registry); if (rc < 0) goto handle_error; } else Py_INCREF(*registry); /* Setup module. */ *module = PyDict_GetItemString(globals, "__name__"); if (*module == NULL) { *module = PyUnicode_FromString("<string>"); if (*module == NULL) goto handle_error; } else Py_INCREF(*module); /* Setup filename. */ *filename = PyDict_GetItemString(globals, "__file__"); if (*filename != NULL && PyUnicode_Check(*filename)) { Py_ssize_t len; int kind; void *data; if (PyUnicode_READY(*filename)) goto handle_error; len = PyUnicode_GetLength(*filename); kind = PyUnicode_KIND(*filename); data = PyUnicode_DATA(*filename); #define ascii_lower(c) ((c <= 127) ? Py_TOLOWER(c) : 0) /* if filename.lower().endswith(".pyc"): */ if (len >= 4 && PyUnicode_READ(kind, data, len-4) == '.' && ascii_lower(PyUnicode_READ(kind, data, len-3)) == 'p' && ascii_lower(PyUnicode_READ(kind, data, len-2)) == 'y' && ascii_lower(PyUnicode_READ(kind, data, len-1)) == 'c') { *filename = PyUnicode_Substring(*filename, 0, PyUnicode_GET_LENGTH(*filename)-1); if (*filename == NULL) goto handle_error; } else Py_INCREF(*filename); } else { *filename = NULL; if (*module != Py_None && PyUnicode_CompareWithASCIIString(*module, "__main__") == 0) { PyObject *argv = _PySys_GetObjectId(&PyId_argv); /* PyList_Check() is needed because sys.argv is set to None during Python finalization */ if (argv != NULL && PyList_Check(argv) && PyList_Size(argv) > 0) { int is_true; *filename = PyList_GetItem(argv, 0); Py_INCREF(*filename); /* If sys.argv[0] is false, then use '__main__'. */ is_true = PyObject_IsTrue(*filename); if (is_true < 0) { Py_DECREF(*filename); goto handle_error; } else if (!is_true) { Py_XSETREF(*filename, PyUnicode_FromString("__main__")); if (*filename == NULL) goto handle_error; } } else { /* embedded interpreters don't have sys.argv, see bug #839151 */ *filename = PyUnicode_FromString("__main__"); if (*filename == NULL) goto handle_error; } } if (*filename == NULL) { *filename = *module; Py_INCREF(*filename); } } return 1; handle_error: /* filename not XDECREF'ed here as there is no way to jump here with a dangling reference. */ Py_XDECREF(*registry); Py_XDECREF(*module); return 0; }
static void show_warning(PyObject *filename, int lineno, PyObject *text, PyObject *category, PyObject *sourceline) { PyObject *f_stderr; PyObject *name; char lineno_str[128]; _Py_IDENTIFIER(__name__); PyOS_snprintf(lineno_str, sizeof(lineno_str), ":%d: ", lineno); name = _PyObject_GetAttrId(category, &PyId___name__); if (name == NULL) /* XXX Can an object lack a '__name__' attribute? */ goto error; f_stderr = _PySys_GetObjectId(&PyId_stderr); if (f_stderr == NULL) { fprintf(stderr, "lost sys.stderr\n"); goto error; } /* Print "filename:lineno: category: text\n" */ if (PyFile_WriteObject(filename, f_stderr, Py_PRINT_RAW) < 0) goto error; if (PyFile_WriteString(lineno_str, f_stderr) < 0) goto error; if (PyFile_WriteObject(name, f_stderr, Py_PRINT_RAW) < 0) goto error; if (PyFile_WriteString(": ", f_stderr) < 0) goto error; if (PyFile_WriteObject(text, f_stderr, Py_PRINT_RAW) < 0) goto error; if (PyFile_WriteString("\n", f_stderr) < 0) goto error; Py_CLEAR(name); /* Print " source_line\n" */ if (sourceline) { int kind; void *data; Py_ssize_t i, len; Py_UCS4 ch; PyObject *truncated; if (PyUnicode_READY(sourceline) < 1) goto error; kind = PyUnicode_KIND(sourceline); data = PyUnicode_DATA(sourceline); len = PyUnicode_GET_LENGTH(sourceline); for (i=0; i<len; i++) { ch = PyUnicode_READ(kind, data, i); if (ch != ' ' && ch != '\t' && ch != '\014') break; } truncated = PyUnicode_Substring(sourceline, i, len); if (truncated == NULL) goto error; PyFile_WriteObject(sourceline, f_stderr, Py_PRINT_RAW); Py_DECREF(truncated); PyFile_WriteString("\n", f_stderr); } else { _Py_DisplaySourceLine(f_stderr, filename, lineno, 2); } error: Py_XDECREF(name); PyErr_Clear(); }
static int format_long_internal(PyObject *value, const InternalFormatSpec *format, _PyUnicodeWriter *writer) { int result = -1; Py_UCS4 maxchar = 127; PyObject *tmp = NULL; Py_ssize_t inumeric_chars; Py_UCS4 sign_char = '\0'; Py_ssize_t n_digits; /* count of digits need from the computed string */ Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which produces non-digits */ Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */ Py_ssize_t n_total; Py_ssize_t prefix = 0; NumberFieldWidths spec; long x; /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ LocaleInfo locale = STATIC_LOCALE_INFO_INIT; /* no precision allowed on integers */ if (format->precision != -1) { PyErr_SetString(PyExc_ValueError, "Precision not allowed in integer format specifier"); goto done; } /* special case for character formatting */ if (format->type == 'c') { /* error to specify a sign */ if (format->sign != '\0') { PyErr_SetString(PyExc_ValueError, "Sign not allowed with integer" " format specifier 'c'"); goto done; } /* taken from unicodeobject.c formatchar() */ /* Integer input truncated to a character */ x = PyLong_AsLong(value); if (x == -1 && PyErr_Occurred()) goto done; if (x < 0 || x > 0x10ffff) { PyErr_SetString(PyExc_OverflowError, "%c arg not in range(0x110000)"); goto done; } tmp = PyUnicode_FromOrdinal(x); inumeric_chars = 0; n_digits = 1; maxchar = Py_MAX(maxchar, (Py_UCS4)x); /* As a sort-of hack, we tell calc_number_widths that we only have "remainder" characters. calc_number_widths thinks these are characters that don't get formatted, only copied into the output string. We do this for 'c' formatting, because the characters are likely to be non-digits. */ n_remainder = 1; } else { int base; int leading_chars_to_skip = 0; /* Number of characters added by PyNumber_ToBase that we want to skip over. */ /* Compute the base and how many characters will be added by PyNumber_ToBase */ switch (format->type) { case 'b': base = 2; leading_chars_to_skip = 2; /* 0b */ break; case 'o': base = 8; leading_chars_to_skip = 2; /* 0o */ break; case 'x': case 'X': base = 16; leading_chars_to_skip = 2; /* 0x */ break; default: /* shouldn't be needed, but stops a compiler warning */ case 'd': case 'n': base = 10; break; } if (format->sign != '+' && format->sign != ' ' && format->width == -1 && format->type != 'X' && format->type != 'n' && !format->thousands_separators && PyLong_CheckExact(value)) { /* Fast path */ return _PyLong_FormatWriter(writer, value, base, format->alternate); } /* The number of prefix chars is the same as the leading chars to skip */ if (format->alternate) n_prefix = leading_chars_to_skip; /* Do the hard part, converting to a string in a given base */ tmp = _PyLong_Format(value, base); if (tmp == NULL || PyUnicode_READY(tmp) == -1) goto done; inumeric_chars = 0; n_digits = PyUnicode_GET_LENGTH(tmp); prefix = inumeric_chars; /* Is a sign character present in the output? If so, remember it and skip it */ if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') { sign_char = '-'; ++prefix; ++leading_chars_to_skip; } /* Skip over the leading chars (0x, 0b, etc.) */ n_digits -= leading_chars_to_skip; inumeric_chars += leading_chars_to_skip; } /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : (format->thousands_separators ? LT_DEFAULT_LOCALE : LT_NO_LOCALE), &locale) == -1) goto done; /* Calculate how much memory we'll need. */ n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars, inumeric_chars + n_digits, n_remainder, 0, &locale, format, &maxchar); /* Allocate the memory. */ if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1) goto done; /* Populate the memory. */ result = fill_number(writer, &spec, tmp, inumeric_chars, inumeric_chars + n_digits, tmp, prefix, format->fill_char, &locale, format->type == 'X'); done: Py_XDECREF(tmp); free_locale_info(&locale); return result; }
static int format_string_internal(PyObject *value, const InternalFormatSpec *format, _PyUnicodeWriter *writer) { Py_ssize_t lpad; Py_ssize_t rpad; Py_ssize_t total; Py_ssize_t len; int result = -1; Py_UCS4 maxchar; assert(PyUnicode_IS_READY(value)); len = PyUnicode_GET_LENGTH(value); /* sign is not allowed on strings */ if (format->sign != '\0') { PyErr_SetString(PyExc_ValueError, "Sign not allowed in string format specifier"); goto done; } /* alternate is not allowed on strings */ if (format->alternate) { PyErr_SetString(PyExc_ValueError, "Alternate form (#) not allowed in string format " "specifier"); goto done; } /* '=' alignment not allowed on strings */ if (format->align == '=') { PyErr_SetString(PyExc_ValueError, "'=' alignment not allowed " "in string format specifier"); goto done; } if ((format->width == -1 || format->width <= len) && (format->precision == -1 || format->precision >= len)) { /* Fast path */ return _PyUnicodeWriter_WriteStr(writer, value); } /* if precision is specified, output no more that format.precision characters */ if (format->precision >= 0 && len >= format->precision) { len = format->precision; } calc_padding(len, format->width, format->align, &lpad, &rpad, &total); maxchar = writer->maxchar; if (lpad != 0 || rpad != 0) maxchar = Py_MAX(maxchar, format->fill_char); if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) { Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len); maxchar = Py_MAX(maxchar, valmaxchar); } /* allocate the resulting string */ if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1) goto done; /* Write into that space. First the padding. */ result = fill_padding(writer, len, format->fill_char, lpad, rpad); if (result == -1) goto done; /* Then the source string. */ if (len) { _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, value, 0, len); } writer->pos += (len + rpad); result = 0; done: return result; }
/* not all fields of format are used. for example, precision is unused. should this take discrete params in order to be more clear about what it does? or is passing a single format parameter easier and more efficient enough to justify a little obfuscation? */ static Py_ssize_t calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start, Py_ssize_t n_end, Py_ssize_t n_remainder, int has_decimal, const LocaleInfo *locale, const InternalFormatSpec *format, Py_UCS4 *maxchar) { Py_ssize_t n_non_digit_non_padding; Py_ssize_t n_padding; spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0); spec->n_lpadding = 0; spec->n_prefix = n_prefix; spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0; spec->n_remainder = n_remainder; spec->n_spadding = 0; spec->n_rpadding = 0; spec->sign = '\0'; spec->n_sign = 0; /* the output will look like: | | | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> | | | sign is computed from format->sign and the actual sign of the number prefix is given (it's for the '0x' prefix) digits is already known the total width is either given, or computed from the actual digits only one of lpadding, spadding, and rpadding can be non-zero, and it's calculated from the width and other fields */ /* compute the various parts we're going to write */ switch (format->sign) { case '+': /* always put a + or - */ spec->n_sign = 1; spec->sign = (sign_char == '-' ? '-' : '+'); break; case ' ': spec->n_sign = 1; spec->sign = (sign_char == '-' ? '-' : ' '); break; default: /* Not specified, or the default (-) */ if (sign_char == '-') { spec->n_sign = 1; spec->sign = '-'; } } /* The number of chars used for non-digits and non-padding. */ n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal + spec->n_remainder; /* min_width can go negative, that's okay. format->width == -1 means we don't care. */ if (format->fill_char == '0' && format->align == '=') spec->n_min_width = format->width - n_non_digit_non_padding; else spec->n_min_width = 0; if (spec->n_digits == 0) /* This case only occurs when using 'c' formatting, we need to special case it because the grouping code always wants to have at least one character. */ spec->n_grouped_digits = 0; else { Py_UCS4 grouping_maxchar; spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping( NULL, 0, 0, NULL, spec->n_digits, spec->n_min_width, locale->grouping, locale->thousands_sep, &grouping_maxchar); *maxchar = Py_MAX(*maxchar, grouping_maxchar); } /* Given the desired width and the total of digit and non-digit space we consume, see if we need any padding. format->width can be negative (meaning no padding), but this code still works in that case. */ n_padding = format->width - (n_non_digit_non_padding + spec->n_grouped_digits); if (n_padding > 0) { /* Some padding is needed. Determine if it's left, space, or right. */ switch (format->align) { case '<': spec->n_rpadding = n_padding; break; case '^': spec->n_lpadding = n_padding / 2; spec->n_rpadding = n_padding - spec->n_lpadding; break; case '=': spec->n_spadding = n_padding; break; case '>': spec->n_lpadding = n_padding; break; default: /* Shouldn't get here, but treat it as '>' */ spec->n_lpadding = n_padding; assert(0); break; } } if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding) *maxchar = Py_MAX(*maxchar, format->fill_char); if (spec->n_decimal) *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point)); return spec->n_lpadding + spec->n_sign + spec->n_prefix + spec->n_spadding + spec->n_grouped_digits + spec->n_decimal + spec->n_remainder + spec->n_rpadding; }
/* Calculate new record length or append field to record. Return new * record length. */ static Py_ssize_t join_append_data(WriterObj *self, unsigned int field_kind, void *field_data, Py_ssize_t field_len, int quote_empty, int *quoted, int copy_phase) { DialectObj *dialect = self->dialect; int i; Py_ssize_t rec_len; #define ADDCH(c) \ do {\ if (copy_phase) \ self->rec[rec_len] = c;\ rec_len++;\ } while(0) rec_len = self->rec_len; /* If this is not the first field we need a field separator */ if (self->num_fields > 0) ADDCH(dialect->delimiter); /* Handle preceding quote */ if (copy_phase && *quoted) ADDCH(dialect->quotechar); /* Copy/count field data */ /* If field is null just pass over */ for (i = 0; field_data && (i < field_len); i++) { Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i); int want_escape = 0; if (c == dialect->delimiter || c == dialect->escapechar || c == dialect->quotechar || PyUnicode_FindChar( dialect->lineterminator, c, 0, PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) { if (dialect->quoting == QUOTE_NONE) want_escape = 1; else { if (c == dialect->quotechar) { if (dialect->doublequote) ADDCH(dialect->quotechar); else want_escape = 1; } if (!want_escape) *quoted = 1; } if (want_escape) { if (!dialect->escapechar) { PyErr_Format(error_obj, "need to escape, but no escapechar set"); return -1; } ADDCH(dialect->escapechar); } } /* Copy field character into record buffer. */ ADDCH(c); } /* If field is empty check if it needs to be quoted. */ if (i == 0 && quote_empty) { if (dialect->quoting == QUOTE_NONE) { PyErr_Format(error_obj, "single empty field record must be quoted"); return -1; } else *quoted = 1; } if (*quoted) { if (copy_phase) ADDCH(dialect->quotechar); else rec_len += 2; } return rec_len; #undef ADDCH }
static void err_input(perrdetail *err) { PyObject *v, *w, *errtype, *errtext; PyObject *msg_obj = NULL; const char *msg = NULL; int offset = err->offset; errtype = PyExc_SyntaxError; switch (err->error) { case E_ERROR: goto cleanup; case E_SYNTAX: errtype = PyExc_IndentationError; if (err->expected == INDENT) msg = "expected an indented block"; else if (err->token == INDENT) msg = "unexpected indent"; else if (err->token == DEDENT) msg = "unexpected unindent"; else if (err->expected == NOTEQUAL) { errtype = PyExc_SyntaxError; msg = "with Barry as BDFL, use '<>' instead of '!='"; } else { errtype = PyExc_SyntaxError; msg = "invalid syntax"; } break; case E_TOKEN: msg = "invalid token"; break; case E_EOFS: msg = "EOF while scanning triple-quoted string literal"; break; case E_EOLS: msg = "EOL while scanning string literal"; break; case E_INTR: if (!PyErr_Occurred()) PyErr_SetNone(PyExc_KeyboardInterrupt); goto cleanup; case E_NOMEM: PyErr_NoMemory(); goto cleanup; case E_EOF: msg = "unexpected EOF while parsing"; break; case E_TABSPACE: errtype = PyExc_TabError; msg = "inconsistent use of tabs and spaces in indentation"; break; case E_OVERFLOW: msg = "expression too long"; break; case E_DEDENT: errtype = PyExc_IndentationError; msg = "unindent does not match any outer indentation level"; break; case E_TOODEEP: errtype = PyExc_IndentationError; msg = "too many levels of indentation"; break; case E_DECODE: { PyObject *type, *value, *tb; PyErr_Fetch(&type, &value, &tb); msg = "unknown decode error"; if (value != NULL) msg_obj = PyObject_Str(value); Py_XDECREF(type); Py_XDECREF(value); Py_XDECREF(tb); break; } case E_LINECONT: msg = "unexpected character after line continuation character"; break; case E_IDENTIFIER: msg = "invalid character in identifier"; break; case E_BADSINGLE: msg = "multiple statements found while compiling a single statement"; break; default: fprintf(stderr, "error=%d\n", err->error); msg = "unknown parsing error"; break; } /* err->text may not be UTF-8 in case of decoding errors. Explicitly convert to an object. */ if (!err->text) { errtext = Py_None; Py_INCREF(Py_None); } else { errtext = PyUnicode_DecodeUTF8(err->text, err->offset, "replace"); if (errtext != NULL) { Py_ssize_t len = strlen(err->text); offset = (int)PyUnicode_GET_LENGTH(errtext); if (len != err->offset) { Py_DECREF(errtext); errtext = PyUnicode_DecodeUTF8(err->text, len, "replace"); } } } v = Py_BuildValue("(OiiN)", err->filename, err->lineno, offset, errtext); if (v != NULL) { if (msg_obj) w = Py_BuildValue("(OO)", msg_obj, v); else w = Py_BuildValue("(sO)", msg, v); } else w = NULL; Py_XDECREF(v); PyErr_SetObject(errtype, w); Py_XDECREF(w); cleanup: Py_XDECREF(msg_obj); if (err->text != NULL) { PyObject_FREE(err->text); err->text = NULL; } }
int _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent) { int err = 0; int fd; int i; char *found_encoding; char *encoding; PyObject *io; PyObject *binary; PyObject *fob = NULL; PyObject *lineobj = NULL; PyObject *res; char buf[MAXPATHLEN+1]; int kind; void *data; /* open the file */ if (filename == NULL) return 0; io = PyImport_ImportModuleNoBlock("io"); if (io == NULL) return -1; binary = _PyObject_CallMethodId(io, &PyId_open, "Os", filename, "rb"); if (binary == NULL) { PyErr_Clear(); binary = _Py_FindSourceFile(filename, buf, sizeof(buf), io); if (binary == NULL) { Py_DECREF(io); return -1; } } /* use the right encoding to decode the file as unicode */ fd = PyObject_AsFileDescriptor(binary); if (fd < 0) { Py_DECREF(io); Py_DECREF(binary); return 0; } found_encoding = PyTokenizer_FindEncodingFilename(fd, filename); if (found_encoding == NULL) PyErr_Clear(); encoding = (found_encoding != NULL) ? found_encoding : "utf-8"; /* Reset position */ if (lseek(fd, 0, SEEK_SET) == (off_t)-1) { Py_DECREF(io); Py_DECREF(binary); PyMem_FREE(found_encoding); return 0; } fob = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "Os", binary, encoding); Py_DECREF(io); Py_DECREF(binary); PyMem_FREE(found_encoding); if (fob == NULL) { PyErr_Clear(); return 0; } /* get the line number lineno */ for (i = 0; i < lineno; i++) { Py_XDECREF(lineobj); lineobj = PyFile_GetLine(fob, -1); if (!lineobj) { err = -1; break; } } res = _PyObject_CallMethodId(fob, &PyId_close, ""); if (res) Py_DECREF(res); else PyErr_Clear(); Py_DECREF(fob); if (!lineobj || !PyUnicode_Check(lineobj)) { Py_XDECREF(lineobj); return err; } /* remove the indentation of the line */ kind = PyUnicode_KIND(lineobj); data = PyUnicode_DATA(lineobj); for (i=0; i < PyUnicode_GET_LENGTH(lineobj); i++) { Py_UCS4 ch = PyUnicode_READ(kind, data, i); if (ch != ' ' && ch != '\t' && ch != '\014') break; } if (i) { PyObject *truncated; truncated = PyUnicode_Substring(lineobj, i, PyUnicode_GET_LENGTH(lineobj)); if (truncated) { Py_DECREF(lineobj); lineobj = truncated; } else { PyErr_Clear(); } } /* Write some spaces before the line */ strcpy(buf, " "); assert (strlen(buf) == 10); while (indent > 0) { if (indent < 10) buf[indent] = '\0'; err = PyFile_WriteString(buf, f); if (err != 0) break; indent -= 10; } /* finally display the line */ if (err == 0) err = PyFile_WriteObject(lineobj, f, Py_PRINT_RAW); Py_DECREF(lineobj); if (err == 0) err = PyFile_WriteString("\n", f); return err; }
/* Calculate new record length or append field to record. Return new * record length. */ static Py_ssize_t join_append_data(WriterObj *self, unsigned int field_kind, void *field_data, Py_ssize_t field_len, int *quoted, int copy_phase) { DialectObj *dialect = self->dialect; int i; Py_ssize_t rec_len; #define INCLEN \ do {\ if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \ goto overflow; \ } \ rec_len++; \ } while(0) #define ADDCH(c) \ do {\ if (copy_phase) \ self->rec[rec_len] = c;\ INCLEN;\ } while(0) rec_len = self->rec_len; /* If this is not the first field we need a field separator */ if (self->num_fields > 0) ADDCH(dialect->delimiter); /* Handle preceding quote */ if (copy_phase && *quoted) ADDCH(dialect->quotechar); /* Copy/count field data */ /* If field is null just pass over */ for (i = 0; field_data && (i < field_len); i++) { Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i); int want_escape = 0; if (c == dialect->delimiter || c == dialect->escapechar || c == dialect->quotechar || PyUnicode_FindChar( dialect->lineterminator, c, 0, PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) { if (dialect->quoting == QUOTE_NONE) want_escape = 1; else { if (c == dialect->quotechar) { if (dialect->doublequote) ADDCH(dialect->quotechar); else want_escape = 1; } if (!want_escape) *quoted = 1; } if (want_escape) { if (!dialect->escapechar) { PyErr_Format(_csvstate_global->error_obj, "need to escape, but no escapechar set"); return -1; } ADDCH(dialect->escapechar); } } /* Copy field character into record buffer. */ ADDCH(c); } if (*quoted) { if (copy_phase) ADDCH(dialect->quotechar); else { INCLEN; /* starting quote */ INCLEN; /* ending quote */ } } return rec_len; overflow: PyErr_NoMemory(); return -1; #undef ADDCH #undef INCLEN }
PyObject * PyFile_GetLine(PyObject *f, int n) { PyObject *result; if (f == NULL) { PyErr_BadInternalCall(); return NULL; } { PyObject *reader; PyObject *args; _Py_IDENTIFIER(readline); reader = _PyObject_GetAttrId(f, &PyId_readline); if (reader == NULL) return NULL; if (n <= 0) args = PyTuple_New(0); else args = Py_BuildValue("(i)", n); if (args == NULL) { Py_DECREF(reader); return NULL; } result = PyEval_CallObject(reader, args); Py_DECREF(reader); Py_DECREF(args); if (result != NULL && !PyBytes_Check(result) && !PyUnicode_Check(result)) { Py_DECREF(result); result = NULL; PyErr_SetString(PyExc_TypeError, "object.readline() returned non-string"); } } if (n < 0 && result != NULL && PyBytes_Check(result)) { char *s = PyBytes_AS_STRING(result); Py_ssize_t len = PyBytes_GET_SIZE(result); if (len == 0) { Py_DECREF(result); result = NULL; PyErr_SetString(PyExc_EOFError, "EOF when reading a line"); } else if (s[len-1] == '\n') { if (result->ob_refcnt == 1) _PyBytes_Resize(&result, len-1); else { PyObject *v; v = PyBytes_FromStringAndSize(s, len-1); Py_DECREF(result); result = v; } } } if (n < 0 && result != NULL && PyUnicode_Check(result)) { Py_ssize_t len = PyUnicode_GET_LENGTH(result); if (len == 0) { Py_DECREF(result); result = NULL; PyErr_SetString(PyExc_EOFError, "EOF when reading a line"); } else if (PyUnicode_READ_CHAR(result, len-1) == '\n') { PyObject *v; v = PyUnicode_Substring(result, 0, len-1); Py_DECREF(result); result = v; } } return result; }