예제 #1
0
int text_check_identifier_nodigit_unicode(const unsigned int ch)
{
	return (ch < 255 && text_check_identifier_nodigit((char)ch)) || Py_UNICODE_ISALPHA(ch);
}
예제 #2
0
static int splitUnicodeString(Splitter *self,PyUnicodeObject *doc)
{

    PyObject *word,*synword;
    PyUnicodeObject * doc1;
    Py_UNICODE *s;

    int len = doc->length;
    int inside_word=0;
    int i=0;
    int start=0;

    doc1 = prepareString(self,doc);
    if (doc1 == NULL)
      return -1;

    s=doc1->str;

    self->list = PyList_New(0);

    for (i = 0; i < len; s++, i++) {
        register Py_UNICODE ch;

        ch = *s;

        if (!inside_word) {
            if (self->index_numbers) {
                if (Py_UNICODE_ISALNUM(ch)) {
                    inside_word=1;
                    start = i;
                }

            } else {
                if (Py_UNICODE_ISALPHA(ch)) {
                    inside_word=1;
                    start = i;
                }
            }
        } else {

            if (!(Py_UNICODE_ISALNUM(ch) || ch=='/' || ch=='_' || ch=='-')) {
                inside_word = 0;

                word = PySequence_GetSlice((PyObject *)doc1,start,
                                           min(i, start + self->max_len));
                if (word==NULL)
                  goto err;

                synword = checkSynword(self,word);
                if (synword != Py_None) {
                  PyList_Append(self->list,synword);
                }

                start =  0;
#ifdef DEBUG
                PyObject_Print(word,stdout,0);
                fflush(stdout);
#endif
                Py_DECREF(word);
            }
        }
    }

    if (inside_word) {
        word = PySequence_GetSlice((PyObject *)doc1,start,
                                   min(len, start + self->max_len));
        if (word==NULL)
          goto err;

        synword = checkSynword(self,word);
        if (synword != Py_None) {
          PyList_Append(self->list,synword);
        }

        Py_DECREF(word);
    }

#ifdef DEBUG
    PyObject_Print(self->list,stdout,0);
    fflush(stdout);
#endif

    Py_DECREF(doc1);
    return 1;

 err:
    Py_DECREF(doc1);
    return -1;
}