Exemplo n.º 1
0
static
void fixlower(PyUnicodeObject *self)
{
    int len = self->length;
    Py_UNICODE *s = self->str;

    while (len-- > 0) {
        register Py_UNICODE ch;

        ch = Py_UNICODE_TOLOWER(*s);
        if (ch != *s) *s = ch;
        s++;
    }
}
Exemplo n.º 2
0
int splitUnicodeString(Splitter *self,PyObject *doc)
{
    PyObject *word ;
    Py_UNICODE *s;
    int i, inside_word=0, start=0, len;
    register int value, next_value;

    s = PyUnicode_AS_UNICODE(doc);       // start of unicode string
    len = PyUnicode_GET_SIZE(doc);


    for (i=0; i<len; i++,s++) {
        register Py_UNICODE c;

        c = *s;

        if (self->casefolding)
            *s = Py_UNICODE_TOLOWER(c);

        value = inode_get(self, c);

        if (value == MISS ) {
            // cache miss

            value = Py_UNICODE_ISALNUM(c) ? IS_ALNUM : IS_TRASH;
            inode_set(self, c, value);
        }

        if (!inside_word) {
            if (value != IS_TRASH ) {
                start = i;
                inside_word = 1;
            }
        } else {

            if (value == IS_SEPARATOR) {
                register Py_UNICODE next_c = *(s+1);

                next_value = inode_get(self, next_c);

                if (next_value == MISS ) {
                    // cache miss

                    next_value = Py_UNICODE_ISALNUM(next_c) ? IS_ALNUM : IS_TRASH;
                    inode_set(self, next_c, next_value);
                }

                if (next_value == IS_TRASH) {
                    if (! (i-start<2 && ! self->single_chars)) {
                        word = Py_BuildValue("u#", s-(i-start), min(i-start, self->max_len));
                        PyList_Append(self->list, word);
                        Py_XDECREF(word);
                    }
                    start = i;
                    inside_word = 0;
                }

            }

            else if (value==IS_TRASH) {
                if (! (i-start<2 && ! self->single_chars)) {
                    word = Py_BuildValue("u#", s-(i-start), min(i-start, self->max_len));
                    PyList_Append(self->list, word);
                    Py_XDECREF(word);
                }
                start = i;
                inside_word = 0;
            }
        }
    }

    if (inside_word) {
        if (! (i-start<2 && ! self->single_chars)) {
            word = Py_BuildValue("u#", s-(i-start), min(i-start, self->max_len));
            PyList_Append(self->list, word);
            Py_XDECREF(word);
        }
    }

    return 1;
}