Exemple #1
0
// BreakIterator.split {{{
static PyObject *
icu_BreakIterator_split(icu_BreakIterator *self, PyObject *args, PyObject *kwargs) {
    int32_t prev = 0, p = 0, sz = 0;
    PyObject *ans = NULL, *token = NULL;
  
    ans = PyList_New(0);
    if (ans == NULL) return PyErr_NoMemory();

    p = ubrk_first(self->break_iterator);
    while (p != UBRK_DONE) {
        prev = p; p = ubrk_next(self->break_iterator);
        if (self->type == UBRK_WORD && ubrk_getRuleStatus(self->break_iterator) == UBRK_WORD_NONE) 
            continue;  // We are not at the start of a word
        sz = (p == UBRK_DONE) ? self->text_len - prev : p - prev;
        if (sz > 0) {
            token = icu_to_python(self->text + prev, sz);
            if (token == NULL) {
                Py_DECREF(ans); ans = NULL; break; 
            }
            if (PyList_Append(ans, token) != 0) {
                Py_DECREF(token); Py_DECREF(ans); ans = NULL; break; 
            }
            Py_DECREF(token);
        }
    }

    return ans;

} // }}}
Exemple #2
0
static PyObject* icu_swap_case(PyObject *self, PyObject *input) {
    PyObject *result = NULL;
    UErrorCode status = U_ZERO_ERROR;
    UChar *input_buf = NULL, *output_buf = NULL;
    UChar32 *buf = NULL;
    int32_t sz = 0, sz32 = 0, i = 0;

    input_buf = python_to_icu(input, &sz);
    if (input_buf == NULL) goto end;
    output_buf = (UChar*) calloc(3 * sz, sizeof(UChar));
    buf = (UChar32*) calloc(2 * sz, sizeof(UChar32));
    if (output_buf == NULL || buf == NULL) { PyErr_NoMemory(); goto end; }
    u_strToUTF32(buf, 2 * sz, &sz32, input_buf, sz, &status);

    for (i = 0; i < sz32; i++) {
        if (u_islower(buf[i])) buf[i] = u_toupper(buf[i]);
        else if (u_isupper(buf[i])) buf[i] = u_tolower(buf[i]);
    }
    u_strFromUTF32(output_buf, 3*sz, &sz, buf, sz32, &status);
    if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); goto end; }
    result = icu_to_python(output_buf, sz);

end:
    if (input_buf != NULL) free(input_buf);
    if (output_buf != NULL) free(output_buf);
    if (buf != NULL) free(buf);
    return result;

} // }}}
Exemple #3
0
static PyObject *
icu_normalize(PyObject *self, PyObject *args) {
    UErrorCode status = U_ZERO_ERROR;
    int32_t sz = 0, cap = 0, rsz = 0;
    NORM_MODES mode;
    UChar *dest = NULL, *source = NULL;
    PyObject *ret = NULL, *src = NULL;

    if (!PyArg_ParseTuple(args, "iO", &mode, &src)) return NULL;
    const UNormalizer2 *n = NULL;
    switch (mode) {
        case NFC:
            n = unorm2_getNFCInstance(&status);
            break;
        case NFKC:
            n = unorm2_getNFKCInstance(&status);
            break;
        case NFD:
            n = unorm2_getNFDInstance(&status);
            break;
        case NFKD:
            n = unorm2_getNFKDInstance(&status);
            break;
    }
    if (U_FAILURE(status)) {
        PyErr_SetString(PyExc_ValueError, u_errorName(status));
        goto end;
    }

    source = python_to_icu(src, &sz);
    if (source == NULL) goto end;
    cap = 2 * sz;
    dest = (UChar*) calloc(cap, sizeof(UChar));
    if (dest == NULL) { PyErr_NoMemory(); goto end; }


    while (1) {
        rsz = unorm2_normalize(n, source, sz, dest, cap, &status);
        if (status == U_BUFFER_OVERFLOW_ERROR) {
            cap *= 2;
            dest = (UChar*) realloc(dest, cap*sizeof(UChar));
            if (dest == NULL) { PyErr_NoMemory(); goto end; }
            continue;
        }
        break;
    }

    if (U_FAILURE(status)) {
        PyErr_SetString(PyExc_ValueError, u_errorName(status));
        goto end;
    }

    ret = icu_to_python(dest, rsz);

end:
    if (source != NULL) free(source);
    if (dest != NULL) free(dest);
    return ret;
} // }}}
Exemple #4
0
// chr {{{
static PyObject *
icu_chr(PyObject *self, PyObject *args) {
    UErrorCode status = U_ZERO_ERROR;
    UChar32 code = 0;
    UChar buf[5] = {0};
    int32_t sz = 0;

    if (!PyArg_ParseTuple(args, "I", &code)) return NULL;

    u_strFromUTF32(buf, 4, &sz, &code, 1, &status);
    if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, "arg not in range(0x110000)"); return NULL; }
    return icu_to_python(buf, sz);
} // }}}
Exemple #5
0
// roundtrip {{{
static PyObject *
icu_roundtrip(PyObject *self, PyObject *src) {
    int32_t sz = 0;
    UChar *icu = NULL;
    PyObject *ret = NULL;

    icu = python_to_icu(src, &sz);
    if (icu != NULL) {
        ret = icu_to_python(icu, sz);
        free(icu);
    }
    return ret;
} // }}}
Exemple #6
0
// roundtrip {{{
static PyObject *
icu_roundtrip(PyObject *self, PyObject *args) {
    int32_t sz = 0;
    UChar *icu = NULL;
    PyObject *ret = NULL, *src = NULL;
  
    if (!PyArg_ParseTuple(args, "O", &src)) return NULL;
    icu = python_to_icu(src, &sz, 1);
    if (icu != NULL) {
        ret = icu_to_python(icu, sz);
        free(icu);
    }
    return ret;
} // }}}
Exemple #7
0
// Collator.display_name {{{
static PyObject *
icu_Collator_display_name(icu_Collator *self, void *closure) {
    const char *loc = NULL;
    UErrorCode status = U_ZERO_ERROR;
    UChar dname[400];
    int32_t sz = 0;

    loc = ucol_getLocaleByType(self->collator, ULOC_ACTUAL_LOCALE, &status);
    if (loc == NULL) {
        PyErr_SetString(PyExc_Exception, "Failed to get actual locale"); return NULL;
    }
    sz = ucol_getDisplayName(loc, "en", dname, sizeof(dname), &status);
    if (U_FAILURE(status)) {PyErr_SetString(PyExc_ValueError, u_errorName(status)); return NULL; }

    return icu_to_python(dname, sz);
}
Exemple #8
0
// Collator.contractions {{{
static PyObject *
icu_Collator_contractions(icu_Collator *self, PyObject *args, PyObject *kwargs) {
    UErrorCode status = U_ZERO_ERROR;
    UChar *str = NULL;
    UChar32 start=0, end=0;
    int32_t count = 0, len = 0, i;
    PyObject *ans = Py_None, *pbuf;

    if (self->contractions == NULL) {
        self->contractions = uset_open(1, 0);
        if (self->contractions == NULL) return PyErr_NoMemory();
        self->contractions = ucol_getTailoredSet(self->collator, &status);
    }
    status = U_ZERO_ERROR; 
    count = uset_getItemCount(self->contractions);

    str = (UChar*)calloc(100, sizeof(UChar));
    if (str == NULL) { PyErr_NoMemory(); goto end; }
    ans = PyTuple_New(count);
    if (ans == NULL) { goto end; }

    for (i = 0; i < count; i++) {
        len = uset_getItem(self->contractions, i, &start, &end, str, 1000, &status);
        if (len >= 2) {
            // We have a string
            status = U_ZERO_ERROR;
            pbuf = icu_to_python(str, len);
            if (pbuf == NULL) { Py_DECREF(ans); ans = NULL; goto end; }
            PyTuple_SetItem(ans, i, pbuf);
        } else {
            // Ranges dont make sense for contractions, ignore them
            PyTuple_SetItem(ans, i, Py_None); Py_INCREF(Py_None);
        }
    }
end:
    if (str != NULL) free(str);
  
    return ans;
} // }}}
Exemple #9
0
static PyObject* icu_change_case(PyObject *self, PyObject *args) {
    char *locale = NULL;
    PyObject *input = NULL, *result = NULL;
    int which = UPPER_CASE;
    UErrorCode status = U_ZERO_ERROR;
    UChar *input_buf = NULL, *output_buf = NULL;
    int32_t sz = 0;

    if (!PyArg_ParseTuple(args, "Oiz", &input, &which, &locale)) return NULL;
    if (locale == NULL) {
        PyErr_SetString(PyExc_NotImplementedError, "You must specify a locale");  // We deliberately use NotImplementedError so that this error can be unambiguously identified
        return NULL;
    }

    input_buf = python_to_icu(input, &sz, 1);
    if (input_buf == NULL) goto end;
    output_buf = (UChar*) calloc(3 * sz, sizeof(UChar));
    if (output_buf == NULL) { PyErr_NoMemory(); goto end; }

    switch (which) {
        case TITLE_CASE:
            sz = u_strToTitle(output_buf, 3 * sz, input_buf, sz, NULL, locale, &status);
            break;
        case UPPER_CASE:
            sz = u_strToUpper(output_buf, 3 * sz, input_buf, sz, locale, &status);
            break;
        default:
            sz = u_strToLower(output_buf, 3 * sz, input_buf, sz, locale, &status);
    }
    if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); goto end; }
    result = icu_to_python(output_buf, sz);

end:
    if (input_buf != NULL) free(input_buf);
    if (output_buf != NULL) free(output_buf);
    return result;

} // }}}
Exemple #10
0
// normalize {{{
static PyObject *
icu_normalize(PyObject *self, PyObject *args) {
    UErrorCode status = U_ZERO_ERROR;
    int32_t sz = 0, mode = UNORM_DEFAULT, cap = 0, rsz = 0;
    UChar *dest = NULL, *source = NULL;
    PyObject *ret = NULL, *src = NULL;
  
    if (!PyArg_ParseTuple(args, "iO", &mode, &src)) return NULL;
    source = python_to_icu(src, &sz, 1);
    if (source == NULL) goto end; 
    cap = 2 * sz;
    dest = (UChar*) calloc(cap, sizeof(UChar));
    if (dest == NULL) { PyErr_NoMemory(); goto end; }

    while (1) {
        rsz = unorm_normalize(source, sz, (UNormalizationMode)mode, 0, dest, cap, &status);
        if (status == U_BUFFER_OVERFLOW_ERROR) {
            cap *= 2;
            dest = (UChar*) realloc(dest, cap*sizeof(UChar));
            if (dest == NULL) { PyErr_NoMemory(); goto end; }
            continue;
        }
        break;
    }

    if (U_FAILURE(status)) {
        PyErr_SetString(PyExc_ValueError, u_errorName(status));
        goto end;
    }
 
    ret = icu_to_python(dest, rsz);

end:
    if (source != NULL) free(source);
    if (dest != NULL) free(dest);
    return ret;
} // }}}