static void fixlower(PyUnicodeObject *self) { int len = self->length; Py_UNICODE *s = self->str; while (len-- > 0) { register Py_UNICODE ch; ch = Py_UNICODE_TOLOWER(*s); if (ch != *s) *s = ch; s++; } }
int splitUnicodeString(Splitter *self,PyObject *doc) { PyObject *word ; Py_UNICODE *s; int i, inside_word=0, start=0, len; register int value, next_value; s = PyUnicode_AS_UNICODE(doc); // start of unicode string len = PyUnicode_GET_SIZE(doc); for (i=0; i<len; i++,s++) { register Py_UNICODE c; c = *s; if (self->casefolding) *s = Py_UNICODE_TOLOWER(c); value = inode_get(self, c); if (value == MISS ) { // cache miss value = Py_UNICODE_ISALNUM(c) ? IS_ALNUM : IS_TRASH; inode_set(self, c, value); } if (!inside_word) { if (value != IS_TRASH ) { start = i; inside_word = 1; } } else { if (value == IS_SEPARATOR) { register Py_UNICODE next_c = *(s+1); next_value = inode_get(self, next_c); if (next_value == MISS ) { // cache miss next_value = Py_UNICODE_ISALNUM(next_c) ? IS_ALNUM : IS_TRASH; inode_set(self, next_c, next_value); } if (next_value == IS_TRASH) { if (! (i-start<2 && ! self->single_chars)) { word = Py_BuildValue("u#", s-(i-start), min(i-start, self->max_len)); PyList_Append(self->list, word); Py_XDECREF(word); } start = i; inside_word = 0; } } else if (value==IS_TRASH) { if (! (i-start<2 && ! self->single_chars)) { word = Py_BuildValue("u#", s-(i-start), min(i-start, self->max_len)); PyList_Append(self->list, word); Py_XDECREF(word); } start = i; inside_word = 0; } } } if (inside_word) { if (! (i-start<2 && ! self->single_chars)) { word = Py_BuildValue("u#", s-(i-start), min(i-start, self->max_len)); PyList_Append(self->list, word); Py_XDECREF(word); } } return 1; }