int text_check_identifier_nodigit_unicode(const unsigned int ch) { return (ch < 255 && text_check_identifier_nodigit((char)ch)) || Py_UNICODE_ISALPHA(ch); }
static int splitUnicodeString(Splitter *self,PyUnicodeObject *doc) { PyObject *word,*synword; PyUnicodeObject * doc1; Py_UNICODE *s; int len = doc->length; int inside_word=0; int i=0; int start=0; doc1 = prepareString(self,doc); if (doc1 == NULL) return -1; s=doc1->str; self->list = PyList_New(0); for (i = 0; i < len; s++, i++) { register Py_UNICODE ch; ch = *s; if (!inside_word) { if (self->index_numbers) { if (Py_UNICODE_ISALNUM(ch)) { inside_word=1; start = i; } } else { if (Py_UNICODE_ISALPHA(ch)) { inside_word=1; start = i; } } } else { if (!(Py_UNICODE_ISALNUM(ch) || ch=='/' || ch=='_' || ch=='-')) { inside_word = 0; word = PySequence_GetSlice((PyObject *)doc1,start, min(i, start + self->max_len)); if (word==NULL) goto err; synword = checkSynword(self,word); if (synword != Py_None) { PyList_Append(self->list,synword); } start = 0; #ifdef DEBUG PyObject_Print(word,stdout,0); fflush(stdout); #endif Py_DECREF(word); } } } if (inside_word) { word = PySequence_GetSlice((PyObject *)doc1,start, min(len, start + self->max_len)); if (word==NULL) goto err; synword = checkSynword(self,word); if (synword != Py_None) { PyList_Append(self->list,synword); } Py_DECREF(word); } #ifdef DEBUG PyObject_Print(self->list,stdout,0); fflush(stdout); #endif Py_DECREF(doc1); return 1; err: Py_DECREF(doc1); return -1; }