replace_interleave(PyStringObject *self, const char *to_s, Py_ssize_t to_len, Py_ssize_t maxcount) { char *self_s, *result_s; Py_ssize_t self_len, result_len; Py_ssize_t count, i, product; PyStringObject *result; self_len = PyString_GET_SIZE(self); /* 1 at the end plus 1 after every character */ count = self_len+1; if (maxcount < count) count = maxcount; /* Check for overflow */ /* result_len = count * to_len + self_len; */ product = count * to_len; if (product / to_len != count) { PyErr_SetString(PyExc_OverflowError, "replace string is too long"); return NULL; } result_len = product + self_len; if (result_len < 0) { PyErr_SetString(PyExc_OverflowError, "replace string is too long"); return NULL; } if (! (result = (PyStringObject *) PyString_FromStringAndSize(NULL, result_len)) ) return NULL; self_s = PyString_AS_STRING(self); result_s = PyString_AS_STRING(result); /* TODO: special case single character, which doesn't need memcpy */ /* Lay the first one down (guaranteed this will occur) */ Py_MEMCPY(result_s, to_s, to_len); result_s += to_len; count -= 1; for (i=0; i<count; i++) { *result_s++ = *self_s++; Py_MEMCPY(result_s, to_s, to_len); result_s += to_len; } /* Copy the rest of the original string */ Py_MEMCPY(result_s, self_s, self_len-i); return result; }
PyObject* string_repeat(register PyStringObject *a, register Py_ssize_t n) { register Py_ssize_t i; register Py_ssize_t j; register Py_ssize_t size; register PyStringObject *op; size_t nbytes; if (n < 0) n = 0; /* watch out for overflows: the size can overflow int, * and the # of bytes needed can overflow size_t */ size = Py_SIZE(a) * n; if (n && size / n != Py_SIZE(a)) { PyErr_SetString(PyExc_OverflowError, "repeated string is too long"); return NULL; } if (size == Py_SIZE(a) && PyString_CheckExact(a)) { Py_INCREF(a); return (PyObject *)a; } nbytes = (size_t)size; if (nbytes + PyStringObject_SIZE <= nbytes) { PyErr_SetString(PyExc_OverflowError, "repeated string is too long"); return NULL; } op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes); if (op == NULL) return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; op->ob_sstate = SSTATE_NOT_INTERNED; op->ob_sval[size] = '\0'; if (Py_SIZE(a) == 1 && n > 0) { memset(op->ob_sval, a->ob_sval[0] , n); return (PyObject *) op; } i = 0; if (i < size) { Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a)); i = Py_SIZE(a); } while (i < size) { j = (i <= size-i) ? i : size-i; Py_MEMCPY(op->ob_sval+i, op->ob_sval, j); i += j; } return (PyObject *) op; }
replace_substring_in_place(PyStringObject *self, const char *from_s, Py_ssize_t from_len, const char *to_s, Py_ssize_t to_len, Py_ssize_t maxcount) { char *result_s, *start, *end; char *self_s; Py_ssize_t self_len, offset; PyStringObject *result; /* The result string will be the same size */ self_s = PyString_AS_STRING(self); self_len = PyString_GET_SIZE(self); offset = stringlib_find(self_s, self_len, from_s, from_len, 0); if (offset == -1) { /* No matches; return the original string */ return return_self(self); } /* Need to make a new string */ result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len); if (result == NULL) return NULL; result_s = PyString_AS_STRING(result); Py_MEMCPY(result_s, self_s, self_len); /* change everything in-place, starting with this one */ start = result_s + offset; Py_MEMCPY(start, to_s, from_len); start += from_len; end = result_s + self_len; while ( --maxcount > 0) { offset = stringlib_find(start, end-start, from_s, from_len, 0); if (offset==-1) break; Py_MEMCPY(start+offset, to_s, from_len); start += offset+from_len; } return result; }
replace_delete_substring(PyStringObject *self, const char *from_s, Py_ssize_t from_len, Py_ssize_t maxcount) { char *self_s, *result_s; char *start, *next, *end; Py_ssize_t self_len, result_len; Py_ssize_t count, offset; PyStringObject *result; self_len = PyString_GET_SIZE(self); self_s = PyString_AS_STRING(self); count = stringlib_count(self_s, self_len, from_s, from_len, maxcount); if (count == 0) { /* no matches */ return return_self(self); } result_len = self_len - (count * from_len); assert (result_len>=0); if ( (result = (PyStringObject *) PyString_FromStringAndSize(NULL, result_len)) == NULL ) return NULL; result_s = PyString_AS_STRING(result); start = self_s; end = self_s + self_len; while (count-- > 0) { offset = stringlib_find(start, end-start, from_s, from_len, 0); if (offset == -1) break; next = start + offset; Py_MEMCPY(result_s, start, next-start); result_s += (next-start); start = next+from_len; } Py_MEMCPY(result_s, start, end-start); return result; }
replace_delete_single_character(PyStringObject *self, char from_c, Py_ssize_t maxcount) { char *self_s, *result_s; char *start, *next, *end; Py_ssize_t self_len, result_len; Py_ssize_t count; PyStringObject *result; self_len = PyString_GET_SIZE(self); self_s = PyString_AS_STRING(self); count = countchar(self_s, self_len, from_c, maxcount); if (count == 0) { return return_self(self); } result_len = self_len - count; /* from_len == 1 */ assert(result_len>=0); if ( (result = (PyStringObject *) PyString_FromStringAndSize(NULL, result_len)) == NULL) return NULL; result_s = PyString_AS_STRING(result); start = self_s; end = self_s + self_len; while (count-- > 0) { next = findchar(start, end-start, from_c); if (next == NULL) break; Py_MEMCPY(result_s, start, next-start); result_s += (next-start); start = next+1; } Py_MEMCPY(result_s, start, end-start); return result; }
void _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len) { Py_ssize_t i; Py_MEMCPY(result, cptr, len); for (i = 0; i < len; i++) { int c = Py_CHARMASK(result[i]); if (Py_ISLOWER(c)) result[i] = Py_TOUPPER(c); } }
static PyObject *get_id(PyObject *obj, void *arg) { PyObject *buffer, *item, *result=NULL; Py_ssize_t i, n, size; char *p; buffer = PyList_New(0); if (buffer == NULL) return NULL; if (generate_id(Node(obj), buffer) < 0) goto finally; n = PyList_GET_SIZE(buffer); if (n == 1) { result = PyList_GET_ITEM(buffer, 0); Py_INCREF(result); } else { for (size = 0, i = 0; i < n; i++) { item = PyList_GET_ITEM(buffer, i); if (!PyString_Check(item)) { PyErr_Format(PyExc_TypeError, "sequence item %zd: expected string, %s found", i, item->ob_type->tp_name); goto finally; } size += PyString_GET_SIZE(item); if (size < 0 || size > PY_SSIZE_T_MAX) { PyErr_SetString(PyExc_OverflowError, "join() result is too long for a Python string"); goto finally; } } result = PyString_FromStringAndSize(NULL, size); if (result == NULL) goto finally; p = PyString_AS_STRING(result); for (i = 0; i < n; i++) { item = PyList_GET_ITEM(buffer, i); size = PyString_GET_SIZE(item); Py_MEMCPY(p, PyString_AS_STRING(item), size); p += size; } } finally: Py_DECREF(buffer); return result; }
replace_single_character_in_place(PyStringObject *self, char from_c, char to_c, Py_ssize_t maxcount) { char *self_s, *result_s, *start, *end, *next; Py_ssize_t self_len; PyStringObject *result; /* The result string will be the same size */ self_s = PyString_AS_STRING(self); self_len = PyString_GET_SIZE(self); next = findchar(self_s, self_len, from_c); if (next == NULL) { /* No matches; return the original string */ return return_self(self); } /* Need to make a new string */ result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len); if (result == NULL) return NULL; result_s = PyString_AS_STRING(result); Py_MEMCPY(result_s, self_s, self_len); /* change everything in-place, starting with this one */ start = result_s + (next-self_s); *start = to_c; start++; end = result_s + self_len; while (--maxcount > 0) { next = findchar(start, end-start, from_c); if (next == NULL) break; *next = to_c; start = next+1; } return result; }
void _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len) { Py_ssize_t i; /* newobj = PyString_FromStringAndSize(NULL, len); if (!newobj) return NULL; s = PyString_AS_STRING(newobj); */ Py_MEMCPY(result, cptr, len); for (i = 0; i < len; i++) { int c = Py_CHARMASK(result[i]); if (Py_ISLOWER(c)) result[i] = Py_TOUPPER(c); } }
/* Sets an exception and returns NULL if out of memory. */ static const char * walk_here_statement(const char *start, const char *p, const char *end) { char *end_here, *temp_string; ++p; /* DEBUG("starting here processing for COMMAND for level 2 at p == '%.10s'", * p); */ if (p >= end) { fprintf(stderr, "bailing\n"); return p; } if ('<' == *p) { /* d2printf("correction, it's a third level here. Handing back to " * "command parsing\n"); */ return ++p; } while (p < end && (isspace(*p) || '-' == *p)) ++p; if ('\'' == *p || '"' == *p) { end_here = (char *)walk_statement_no_parsing(p + 1, end, *p); ++p; } else { end_here = (char *)walk_command_complex(start, p, end, ' ', SPACE_PARSING); if (!end_here) return NULL; } /* INFO("end_here=%.5s",end_here); */ temp_string = malloc(end_here -p + 1); if (!temp_string) { PyErr_NoMemory(); return NULL; } int here_len = end_here - p; Py_MEMCPY(temp_string, p, here_len); temp_string[here_len] = '\0'; /* d2printf("matched len('%zi')/'%s' for a here word\n", end_here - p, * temp_string); */ /* XXX watch this. potential for horkage. need to do the quote removal thing. this sucks. */ ++end_here; if (end_here >= end) { free(temp_string); return end_here; } end_here = (char *)bmh_search((unsigned char*)temp_string, (unsigned char*)end_here, end - end_here); while(end_here) { char *i = end_here + here_len; if (';' == *i || '\n' == *i || '\r' == *i) { i = end_here - 1; while (i != p && ('\t' == *i || ' ' == *i)) --i; if (i != p && '\n' == *i) break; } end_here = (char *)bmh_search((unsigned char*)temp_string, (unsigned char*)(end_here + here_len), end - end_here - here_len); } INFO("bmh returned %p", end_here); free(temp_string); if (!end_here) { return end; } /* d2printf("bmh = %.10s\n", end_here); */ return end_here + here_len; }
replace_substring(PyStringObject *self, const char *from_s, Py_ssize_t from_len, const char *to_s, Py_ssize_t to_len, Py_ssize_t maxcount) { char *self_s, *result_s; char *start, *next, *end; Py_ssize_t self_len, result_len; Py_ssize_t count, offset, product; PyStringObject *result; self_s = PyString_AS_STRING(self); self_len = PyString_GET_SIZE(self); count = stringlib_count(self_s, self_len, from_s, from_len, maxcount); if (count == 0) { /* no matches, return unchanged */ return return_self(self); } /* Check for overflow */ /* result_len = self_len + count * (to_len-from_len) */ product = count * (to_len-from_len); if (product / (to_len-from_len) != count) { PyErr_SetString(PyExc_OverflowError, "replace string is too long"); return NULL; } result_len = self_len + product; if (result_len < 0) { PyErr_SetString(PyExc_OverflowError, "replace string is too long"); return NULL; } if ( (result = (PyStringObject *) PyString_FromStringAndSize(NULL, result_len)) == NULL) return NULL; result_s = PyString_AS_STRING(result); start = self_s; end = self_s + self_len; while (count-- > 0) { offset = stringlib_find(start, end-start, from_s, from_len, 0); if (offset == -1) break; next = start+offset; if (next == start) { /* replace with the 'to' */ Py_MEMCPY(result_s, to_s, to_len); result_s += to_len; start += from_len; } else { /* copy the unchanged old then the 'to' */ Py_MEMCPY(result_s, start, next-start); result_s += (next-start); Py_MEMCPY(result_s, to_s, to_len); result_s += to_len; start = next+from_len; } } /* Copy the remainder of the remaining string */ Py_MEMCPY(result_s, start, end-start); return result; }
replace_single_character(PyStringObject *self, char from_c, const char *to_s, Py_ssize_t to_len, Py_ssize_t maxcount) { char *self_s, *result_s; char *start, *next, *end; Py_ssize_t self_len, result_len; Py_ssize_t count, product; PyStringObject *result; self_s = PyString_AS_STRING(self); self_len = PyString_GET_SIZE(self); count = countchar(self_s, self_len, from_c, maxcount); if (count == 0) { /* no matches, return unchanged */ return return_self(self); } /* use the difference between current and new, hence the "-1" */ /* result_len = self_len + count * (to_len-1) */ product = count * (to_len-1); if (product / (to_len-1) != count) { PyErr_SetString(PyExc_OverflowError, "replace string is too long"); return NULL; } result_len = self_len + product; if (result_len < 0) { PyErr_SetString(PyExc_OverflowError, "replace string is too long"); return NULL; } if ( (result = (PyStringObject *) PyString_FromStringAndSize(NULL, result_len)) == NULL) return NULL; result_s = PyString_AS_STRING(result); start = self_s; end = self_s + self_len; while (count-- > 0) { next = findchar(start, end-start, from_c); if (next == NULL) break; if (next == start) { /* replace with the 'to' */ Py_MEMCPY(result_s, to_s, to_len); result_s += to_len; start += 1; } else { /* copy the unchanged old then the 'to' */ Py_MEMCPY(result_s, start, next-start); result_s += (next-start); Py_MEMCPY(result_s, to_s, to_len); result_s += to_len; start = next+1; } } /* Copy the remainder of the remaining string */ Py_MEMCPY(result_s, start, end-start); return result; }
PyObject *string_join(PyStringObject *self, PyObject *orig) { char *sep = PyString_AS_STRING(self); const Py_ssize_t seplen = PyString_GET_SIZE(self); PyObject *res = NULL; char *p; Py_ssize_t seqlen = 0; size_t sz = 0; Py_ssize_t i; PyObject *seq, *item; seq = PySequence_Fast(orig, ""); if (seq == NULL) { return NULL; } seqlen = PySequence_Size(seq); if (seqlen == 0) { Py_DECREF(seq); return PyString_FromString(""); } if (seqlen == 1) { item = PySequence_Fast_GET_ITEM(seq, 0); if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) { Py_INCREF(item); Py_DECREF(seq); return item; } } /* There are at least two things to join, or else we have a subclass * of the builtin types in the sequence. * Do a pre-pass to figure out the total amount of space we'll * need (sz), see whether any argument is absurd, and defer to * the Unicode join if appropriate. */ for (i = 0; i < seqlen; i++) { const size_t old_sz = sz; item = PySequence_Fast_GET_ITEM(seq, i); if (!PyString_Check(item)){ #ifdef Py_USING_UNICODE if (PyUnicode_Check(item)) { /* Defer to Unicode join. * CAUTION: There's no gurantee that the * original sequence can be iterated over * again, so we must pass seq here. */ PyObject *result; result = PyUnicode_Join((PyObject *)self, seq); Py_DECREF(seq); return result; } #endif PyErr_Format(PyExc_TypeError, "sequence item %zd: expected string," " %.80s found", i, Py_TYPE(item)->tp_name); Py_DECREF(seq); return NULL; } sz += PyString_GET_SIZE(item); if (i != 0) sz += seplen; if (sz < old_sz || sz > PY_SSIZE_T_MAX) { PyErr_SetString(PyExc_OverflowError, "join() result is too long for a Python string"); Py_DECREF(seq); return NULL; } } /* Allocate result space. */ res = PyString_FromStringAndSize((char*)NULL, sz); if (res == NULL) { Py_DECREF(seq); return NULL; } /* Catenate everything. */ p = PyString_AS_STRING(res); for (i = 0; i < seqlen; ++i) { size_t n; item = PySequence_Fast_GET_ITEM(seq, i); n = PyString_GET_SIZE(item); Py_MEMCPY(p, PyString_AS_STRING(item), n); p += n; if (i < seqlen - 1) { Py_MEMCPY(p, sep, seplen); p += seplen; } } Py_DECREF(seq); return res; }
PyObject * Bytes_Format(PyObject *format, PyObject *args) { char *fmt, *res; Py_ssize_t arglen, argidx; Py_ssize_t reslen, rescnt, fmtcnt; int args_owned = 0; PyObject *result; PyObject *dict = NULL; if (format == NULL || !Bytes_Check(format) || args == NULL) { PyErr_BadInternalCall(); return NULL; } fmt = Bytes_AS_STRING(format); fmtcnt = Bytes_GET_SIZE(format); reslen = rescnt = fmtcnt + 100; result = Bytes_FromStringAndSize((char *)NULL, reslen); if (result == NULL) return NULL; res = Bytes_AsString(result); if (PyTuple_Check(args)) { arglen = PyTuple_GET_SIZE(args); argidx = 0; } else { arglen = -1; argidx = -2; } if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) && !PyObject_TypeCheck(args, &Bytes_Type)) dict = args; while (--fmtcnt >= 0) { if (*fmt != '%') { if (--rescnt < 0) { rescnt = fmtcnt + 100; reslen += rescnt; if (_Bytes_Resize(&result, reslen)) return NULL; res = Bytes_AS_STRING(result) + reslen - rescnt; --rescnt; } *res++ = *fmt++; } else { /* Got a format specifier */ Py_ssize_t width = -1; int c = '\0'; PyObject *v = NULL; PyObject *temp = NULL; char *pbuf; Py_ssize_t len; fmt++; if (*fmt == '(') { char *keystart; Py_ssize_t keylen; PyObject *key; int pcount = 1; if (dict == NULL) { PyErr_SetString(PyExc_TypeError, "format requires a mapping"); goto error; } ++fmt; --fmtcnt; keystart = fmt; /* Skip over balanced parentheses */ while (pcount > 0 && --fmtcnt >= 0) { if (*fmt == ')') --pcount; else if (*fmt == '(') ++pcount; fmt++; } keylen = fmt - keystart - 1; if (fmtcnt < 0 || pcount > 0) { PyErr_SetString(PyExc_ValueError, "incomplete format key"); goto error; } key = Text_FromUTF8AndSize(keystart, keylen); if (key == NULL) goto error; if (args_owned) { Py_DECREF(args); args_owned = 0; } args = PyObject_GetItem(dict, key); Py_DECREF(key); if (args == NULL) { goto error; } args_owned = 1; arglen = -1; argidx = -2; } while (--fmtcnt >= 0) { c = *fmt++; break; } if (fmtcnt < 0) { PyErr_SetString(PyExc_ValueError, "incomplete format"); goto error; } if (c != '%') { v = getnextarg(args, arglen, &argidx); if (v == NULL) goto error; } switch (c) { case '%': pbuf = "%"; len = 1; break; case 's': /* only bytes! */ if (!Bytes_CheckExact(v)) { PyErr_Format(PyExc_ValueError, "only bytes values expected, got %s", Py_TYPE(v)->tp_name); goto error; } temp = v; Py_INCREF(v); pbuf = Bytes_AS_STRING(temp); len = Bytes_GET_SIZE(temp); break; default: PyErr_Format(PyExc_ValueError, "unsupported format character '%c' (0x%x) " "at index " FORMAT_CODE_PY_SSIZE_T, c, c, (Py_ssize_t)(fmt - 1 - Bytes_AsString(format))); goto error; } if (width < len) width = len; if (rescnt < width) { reslen -= rescnt; rescnt = width + fmtcnt + 100; reslen += rescnt; if (reslen < 0) { Py_DECREF(result); Py_XDECREF(temp); return PyErr_NoMemory(); } if (_Bytes_Resize(&result, reslen)) { Py_XDECREF(temp); return NULL; } res = Bytes_AS_STRING(result) + reslen - rescnt; } Py_MEMCPY(res, pbuf, len); res += len; rescnt -= len; while (--width >= len) { --rescnt; *res++ = ' '; } if (dict && (argidx < arglen) && c != '%') { PyErr_SetString(PyExc_TypeError, "not all arguments converted during string formatting"); Py_XDECREF(temp); goto error; } Py_XDECREF(temp); } /* '%' */ } /* until end */ if (argidx < arglen && !dict) { PyErr_SetString(PyExc_TypeError, "not all arguments converted during string formatting"); goto error; } if (args_owned) { Py_DECREF(args); } if (_Bytes_Resize(&result, reslen - rescnt)) return NULL; return result; error: Py_DECREF(result); if (args_owned) { Py_DECREF(args); } return NULL; }