static PyObject * Splitter_split(Splitter *self, PyObject *args) { PyObject *doc; char *encoding = "iso-8859-15"; Py_XDECREF(self->list); self->list = PyList_New(0); if (! (PyArg_ParseTuple(args,"O|s",&doc, &encoding))) return NULL; if (PyBytes_Check(doc)) { if (strlen(encoding) == 0 || !strcmp(encoding,"ascii")) splitString(self, doc); else { PyObject *doc1; if (! (doc1 = PyUnicode_FromEncodedObject(doc, encoding, "strict"))) { PyErr_SetString(PyExc_UnicodeError,"unicode conversion failed (maybe wrong encoding parameter)"); return NULL; } splitUnicodeString(self, doc1); Py_XDECREF(doc1); } } else if (PyUnicode_Check(doc)) { PyObject *doc1; // create a *real* copy since we need to modify the string doc1 = PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(doc)); Py_UNICODE_COPY(PyUnicode_AS_UNICODE(doc1), PyUnicode_AS_UNICODE(doc), PyUnicode_GET_SIZE(doc)); splitUnicodeString(self, doc1); Py_DECREF(doc1); } else { PyErr_SetString(PyExc_TypeError, "first argument must be string or unicode"); return NULL; } Py_XINCREF(self->list); return self->list; }
static PyObject * newSplitter(PyObject *modinfo, PyObject *args,PyObject *keywds) { Splitter *self=NULL; PyObject *doc=NULL, *unicodedoc=NULL,*synstop=NULL; char *encoding = "latin1"; int index_numbers = 0; int max_len=64; int single_char = 0; int casefolding=1; if (! (PyArg_ParseTupleAndKeywords(args,keywds,"O|Osiiii",splitter_args,&doc,&synstop,&encoding,&index_numbers,&single_char,&max_len,&casefolding))) return NULL; #ifdef DEBUG puts("got text"); PyObject_Print(doc,stdout,0); fflush(stdout); #endif if (index_numbers<0 || index_numbers>1) { PyErr_SetString(PyExc_ValueError,"indexnumbers must be 0 or 1"); return NULL; } if (casefolding<0 || casefolding>1) { PyErr_SetString(PyExc_ValueError,"casefolding must be 0 or 1"); return NULL; } if (single_char<0 || single_char>1) { PyErr_SetString(PyExc_ValueError,"singlechar must be 0 or 1"); return NULL; } if (max_len<1 || max_len>128) { PyErr_SetString(PyExc_ValueError,"maxlen must be between 1 and 128"); return NULL; } if (PyString_Check(doc)) { unicodedoc = PyUnicode_FromEncodedObject(doc,encoding,"strict"); if (unicodedoc ==NULL) { PyErr_SetString(PyExc_UnicodeError, "Problem converting encoded string"); return NULL; } } else if( PyUnicode_Check(doc)) { unicodedoc = doc; Py_INCREF(unicodedoc); } else { PyErr_SetString(PyExc_TypeError, "first argument is neither string nor unicode."); return NULL; } if (! (self = PyObject_NEW(Splitter, &SplitterType))) return NULL; if (synstop) { self->synstop = synstop; Py_INCREF(synstop); } else self->synstop=NULL; self->index_numbers = index_numbers; self->max_len = max_len; self->allow_single_chars = single_char; self->casefolding = casefolding; if ((splitUnicodeString(self,(PyUnicodeObject *)unicodedoc)) < 0) goto err; Py_DECREF(unicodedoc); return (PyObject*)self; err: Py_DECREF(self); Py_DECREF(unicodedoc); return NULL; }