// BreakIterator.split {{{ static PyObject * icu_BreakIterator_split(icu_BreakIterator *self, PyObject *args, PyObject *kwargs) { int32_t prev = 0, p = 0, sz = 0; PyObject *ans = NULL, *token = NULL; ans = PyList_New(0); if (ans == NULL) return PyErr_NoMemory(); p = ubrk_first(self->break_iterator); while (p != UBRK_DONE) { prev = p; p = ubrk_next(self->break_iterator); if (self->type == UBRK_WORD && ubrk_getRuleStatus(self->break_iterator) == UBRK_WORD_NONE) continue; // We are not at the start of a word sz = (p == UBRK_DONE) ? self->text_len - prev : p - prev; if (sz > 0) { token = icu_to_python(self->text + prev, sz); if (token == NULL) { Py_DECREF(ans); ans = NULL; break; } if (PyList_Append(ans, token) != 0) { Py_DECREF(token); Py_DECREF(ans); ans = NULL; break; } Py_DECREF(token); } } return ans; } // }}}
static PyObject* icu_swap_case(PyObject *self, PyObject *input) { PyObject *result = NULL; UErrorCode status = U_ZERO_ERROR; UChar *input_buf = NULL, *output_buf = NULL; UChar32 *buf = NULL; int32_t sz = 0, sz32 = 0, i = 0; input_buf = python_to_icu(input, &sz); if (input_buf == NULL) goto end; output_buf = (UChar*) calloc(3 * sz, sizeof(UChar)); buf = (UChar32*) calloc(2 * sz, sizeof(UChar32)); if (output_buf == NULL || buf == NULL) { PyErr_NoMemory(); goto end; } u_strToUTF32(buf, 2 * sz, &sz32, input_buf, sz, &status); for (i = 0; i < sz32; i++) { if (u_islower(buf[i])) buf[i] = u_toupper(buf[i]); else if (u_isupper(buf[i])) buf[i] = u_tolower(buf[i]); } u_strFromUTF32(output_buf, 3*sz, &sz, buf, sz32, &status); if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); goto end; } result = icu_to_python(output_buf, sz); end: if (input_buf != NULL) free(input_buf); if (output_buf != NULL) free(output_buf); if (buf != NULL) free(buf); return result; } // }}}
static PyObject * icu_normalize(PyObject *self, PyObject *args) { UErrorCode status = U_ZERO_ERROR; int32_t sz = 0, cap = 0, rsz = 0; NORM_MODES mode; UChar *dest = NULL, *source = NULL; PyObject *ret = NULL, *src = NULL; if (!PyArg_ParseTuple(args, "iO", &mode, &src)) return NULL; const UNormalizer2 *n = NULL; switch (mode) { case NFC: n = unorm2_getNFCInstance(&status); break; case NFKC: n = unorm2_getNFKCInstance(&status); break; case NFD: n = unorm2_getNFDInstance(&status); break; case NFKD: n = unorm2_getNFKDInstance(&status); break; } if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); goto end; } source = python_to_icu(src, &sz); if (source == NULL) goto end; cap = 2 * sz; dest = (UChar*) calloc(cap, sizeof(UChar)); if (dest == NULL) { PyErr_NoMemory(); goto end; } while (1) { rsz = unorm2_normalize(n, source, sz, dest, cap, &status); if (status == U_BUFFER_OVERFLOW_ERROR) { cap *= 2; dest = (UChar*) realloc(dest, cap*sizeof(UChar)); if (dest == NULL) { PyErr_NoMemory(); goto end; } continue; } break; } if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); goto end; } ret = icu_to_python(dest, rsz); end: if (source != NULL) free(source); if (dest != NULL) free(dest); return ret; } // }}}
// chr {{{ static PyObject * icu_chr(PyObject *self, PyObject *args) { UErrorCode status = U_ZERO_ERROR; UChar32 code = 0; UChar buf[5] = {0}; int32_t sz = 0; if (!PyArg_ParseTuple(args, "I", &code)) return NULL; u_strFromUTF32(buf, 4, &sz, &code, 1, &status); if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, "arg not in range(0x110000)"); return NULL; } return icu_to_python(buf, sz); } // }}}
// roundtrip {{{ static PyObject * icu_roundtrip(PyObject *self, PyObject *src) { int32_t sz = 0; UChar *icu = NULL; PyObject *ret = NULL; icu = python_to_icu(src, &sz); if (icu != NULL) { ret = icu_to_python(icu, sz); free(icu); } return ret; } // }}}
// roundtrip {{{ static PyObject * icu_roundtrip(PyObject *self, PyObject *args) { int32_t sz = 0; UChar *icu = NULL; PyObject *ret = NULL, *src = NULL; if (!PyArg_ParseTuple(args, "O", &src)) return NULL; icu = python_to_icu(src, &sz, 1); if (icu != NULL) { ret = icu_to_python(icu, sz); free(icu); } return ret; } // }}}
// Collator.display_name {{{ static PyObject * icu_Collator_display_name(icu_Collator *self, void *closure) { const char *loc = NULL; UErrorCode status = U_ZERO_ERROR; UChar dname[400]; int32_t sz = 0; loc = ucol_getLocaleByType(self->collator, ULOC_ACTUAL_LOCALE, &status); if (loc == NULL) { PyErr_SetString(PyExc_Exception, "Failed to get actual locale"); return NULL; } sz = ucol_getDisplayName(loc, "en", dname, sizeof(dname), &status); if (U_FAILURE(status)) {PyErr_SetString(PyExc_ValueError, u_errorName(status)); return NULL; } return icu_to_python(dname, sz); }
// Collator.contractions {{{ static PyObject * icu_Collator_contractions(icu_Collator *self, PyObject *args, PyObject *kwargs) { UErrorCode status = U_ZERO_ERROR; UChar *str = NULL; UChar32 start=0, end=0; int32_t count = 0, len = 0, i; PyObject *ans = Py_None, *pbuf; if (self->contractions == NULL) { self->contractions = uset_open(1, 0); if (self->contractions == NULL) return PyErr_NoMemory(); self->contractions = ucol_getTailoredSet(self->collator, &status); } status = U_ZERO_ERROR; count = uset_getItemCount(self->contractions); str = (UChar*)calloc(100, sizeof(UChar)); if (str == NULL) { PyErr_NoMemory(); goto end; } ans = PyTuple_New(count); if (ans == NULL) { goto end; } for (i = 0; i < count; i++) { len = uset_getItem(self->contractions, i, &start, &end, str, 1000, &status); if (len >= 2) { // We have a string status = U_ZERO_ERROR; pbuf = icu_to_python(str, len); if (pbuf == NULL) { Py_DECREF(ans); ans = NULL; goto end; } PyTuple_SetItem(ans, i, pbuf); } else { // Ranges dont make sense for contractions, ignore them PyTuple_SetItem(ans, i, Py_None); Py_INCREF(Py_None); } } end: if (str != NULL) free(str); return ans; } // }}}
static PyObject* icu_change_case(PyObject *self, PyObject *args) { char *locale = NULL; PyObject *input = NULL, *result = NULL; int which = UPPER_CASE; UErrorCode status = U_ZERO_ERROR; UChar *input_buf = NULL, *output_buf = NULL; int32_t sz = 0; if (!PyArg_ParseTuple(args, "Oiz", &input, &which, &locale)) return NULL; if (locale == NULL) { PyErr_SetString(PyExc_NotImplementedError, "You must specify a locale"); // We deliberately use NotImplementedError so that this error can be unambiguously identified return NULL; } input_buf = python_to_icu(input, &sz, 1); if (input_buf == NULL) goto end; output_buf = (UChar*) calloc(3 * sz, sizeof(UChar)); if (output_buf == NULL) { PyErr_NoMemory(); goto end; } switch (which) { case TITLE_CASE: sz = u_strToTitle(output_buf, 3 * sz, input_buf, sz, NULL, locale, &status); break; case UPPER_CASE: sz = u_strToUpper(output_buf, 3 * sz, input_buf, sz, locale, &status); break; default: sz = u_strToLower(output_buf, 3 * sz, input_buf, sz, locale, &status); } if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); goto end; } result = icu_to_python(output_buf, sz); end: if (input_buf != NULL) free(input_buf); if (output_buf != NULL) free(output_buf); return result; } // }}}
// normalize {{{ static PyObject * icu_normalize(PyObject *self, PyObject *args) { UErrorCode status = U_ZERO_ERROR; int32_t sz = 0, mode = UNORM_DEFAULT, cap = 0, rsz = 0; UChar *dest = NULL, *source = NULL; PyObject *ret = NULL, *src = NULL; if (!PyArg_ParseTuple(args, "iO", &mode, &src)) return NULL; source = python_to_icu(src, &sz, 1); if (source == NULL) goto end; cap = 2 * sz; dest = (UChar*) calloc(cap, sizeof(UChar)); if (dest == NULL) { PyErr_NoMemory(); goto end; } while (1) { rsz = unorm_normalize(source, sz, (UNormalizationMode)mode, 0, dest, cap, &status); if (status == U_BUFFER_OVERFLOW_ERROR) { cap *= 2; dest = (UChar*) realloc(dest, cap*sizeof(UChar)); if (dest == NULL) { PyErr_NoMemory(); goto end; } continue; } break; } if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); goto end; } ret = icu_to_python(dest, rsz); end: if (source != NULL) free(source); if (dest != NULL) free(dest); return ret; } // }}}