コード例 #1
0
ファイル: i18n_trans.cpp プロジェクト: djui/i18n
ERL_NIF_TERM trans_ids(ErlNifEnv* env, int argc, 
    const ERL_NIF_TERM /*argv*/[])
{
    ERL_NIF_TERM out;
    UEnumeration* en; 
    UErrorCode status = U_ZERO_ERROR;

    if (argc != 0)
        return enif_make_badarg(env);

    en = utrans_openIDs(&status);   
    CHECK(env, status);

    out = enum_to_term(env, en);
    uenum_close(en);

    return out;
}
コード例 #2
0
ファイル: yaz-icu.c プロジェクト: funkymalc/yaz
static void print_icu_transliterators(const struct config_t *p_config)
{
    UErrorCode status;
    UEnumeration *en = utrans_openIDs(&status);
    int32_t count = uenum_count(en, &status);
    const char *name;
    int32_t length;

    if (p_config->xmloutput)
        fprintf(p_config->outfile, "<transliterators count=\"%d\">\n",  count);
    else
        fprintf(p_config->outfile, "Available ICU transliterators: %d\n", count);

    while ((name = uenum_next(en, &length, &status)))
    {
        if (p_config->xmloutput)
            fprintf(p_config->outfile, "<transliterator id=\"%s\"/>\n", name);
        else
            fprintf(p_config->outfile, "%s\n", name);
    }
    uenum_close(en);
    if (p_config->xmloutput)
        fprintf(p_config->outfile, "</transliterators>\n");
    else
    {
        fprintf(p_config->outfile, "\n\nUnicode Set Patterns:\n"
                "   Pattern         Description\n"
                "   Ranges          [a-z] 	The lower case letters a through z\n"
                "   Named Chars     [abc123] The six characters a,b,c,1,2 and 3\n"
                "   String          [abc{def}] chars a, b and c, and string 'def'\n"
                "   Categories      [\\p{Letter}] Perl General Category 'Letter'.\n"
                "   Categories      [:Letter:] Posix General Category 'Letter'.\n"
                "\n"
                "   Combination     Example\n"
                "   Union           [[:Greek:] [:letter:]]\n"
                "   Intersection    [[:Greek:] & [:letter:]]\n"
                "   Set Complement  [[:Greek:] - [:letter:]]\n"
                "   Complement      [^[:Greek:] [:letter:]]\n"
                "\n"
             "see: http://icu.sourceforge.net/userguide/unicodeSet.html\n"
                "\n"
                "Examples:\n"
                "   [:Punctuation:] Any-Remove\n"
                "   [:Cased-Letter:] Any-Upper\n"
                "   [:Control:] Any-Remove\n"
                "   [:Decimal_Number:] Any-Remove\n"
                "   [:Final_Punctuation:] Any-Remove\n"
                "   [:Georgian:] Any-Upper\n"
                "   [:Katakana:] Any-Remove\n"
                "   [:Arabic:] Any-Remove\n"
                "   [:Punctuation:] Remove\n"
                "   [[:Punctuation:]-[.,]] Remove\n"
                "   [:Line_Separator:] Any-Remove\n"
                "   [:Math_Symbol:] Any-Remove\n"
                "   Lower; [:^Letter:] Remove (word tokenization)\n"
                "   [:^Number:] Remove (numeric tokenization)\n"
                "   [:^Katagana:] Remove (remove everything except Katagana)\n"
                "   Lower;[[:WhiteSpace:][:Punctuation:]] Remove (word tokenization)\n"
                "   NFD; [:Nonspacing Mark:] Remove; NFC   (removes accents from characters)\n"
                "   [A-Za-z]; Lower(); Latin-Katakana; Katakana-Hiragana (transforms latin and katagana to hiragana)\n"
                "   [[:separator:][:start punctuation:][:initial punctuation:]] Remove \n"
                "\n"
                "see http://userguide.icu-project.org/transforms/general\n"
                "    http://www.unicode.org/reports/tr44/\n"
            );


        fprintf(p_config->outfile, "\n\n");

    }
}
コード例 #3
0
ファイル: utransts.c プロジェクト: Strongc/WinObjC
static void TestUnicodeIDs() {
    UEnumeration *uenum;
    UTransliterator *utrans;
    const UChar *id, *id2;
    int32_t idLength, id2Length, count, count2;

    UErrorCode errorCode;

    errorCode=U_ZERO_ERROR;
    uenum=utrans_openIDs(&errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("utrans_openIDs() failed - %s\n", u_errorName(errorCode));
        return;
    }

    count=uenum_count(uenum, &errorCode);
    if(U_FAILURE(errorCode) || count<1) {
        log_err("uenum_count(transliterator IDs)=%d - %s\n", count, u_errorName(errorCode));
    }

    count=0;
    for(;;) {
        id=uenum_unext(uenum, &idLength, &errorCode);
        if(U_FAILURE(errorCode)) {
            log_err("uenum_unext(transliterator ID %d) failed - %s\n", count, u_errorName(errorCode));
            break;
        }
        if(id==NULL) {
            break;
        }

        if(++count>10) {
            /* try to actually open only a few transliterators */
            continue;
        }

        utrans=utrans_openU(id, idLength, UTRANS_FORWARD, NULL, 0, NULL, &errorCode);
        if(U_FAILURE(errorCode)) {
            log_err("utrans_openU(%s) failed - %s\n", aescstrdup(id, idLength), u_errorName(errorCode));
            continue;
        }

        id2=utrans_getUnicodeID(utrans, &id2Length);
        if(idLength!=id2Length || 0!=u_memcmp(id, id2, idLength)) {
            log_err("utrans_getUnicodeID(%s) does not match the original ID\n", aescstrdup(id, idLength));
        }

        utrans_close(utrans);
    }

    uenum_reset(uenum, &errorCode);
    if(U_FAILURE(errorCode) || count<1) {
        log_err("uenum_reset(transliterator IDs) failed - %s\n", u_errorName(errorCode));
    } else {
        count2=uenum_count(uenum, &errorCode);
        if(U_FAILURE(errorCode) || count<1) {
            log_err("2nd uenum_count(transliterator IDs)=%d - %s\n", count2, u_errorName(errorCode));
        } else if(count!=count2) {
            log_err("uenum_unext(transliterator IDs) returned %d IDs but uenum_count() after uenum_reset() claims there are %d\n", count, count2);
        }
    }

    uenum_close(uenum);
}
コード例 #4
0
static inline UTransliterator *utrans_find(CFStringRef transform, UTransDirection dir, UErrorCode *error) {
    UEnumeration *uenum = NULL;
    UTransliterator *trans = NULL;
    do {
        uenum = utrans_openIDs(error);
        if (U_FAILURE(*error)) {
            DEBUG_LOG("%s", u_errorName(*error));
            break;
        }

        int32_t count = uenum_count(uenum, error);
        if (U_FAILURE(*error)) {
            DEBUG_LOG("%s", u_errorName(*error));
            break;
        }
        int32_t trans_idx = 0;
        while (trans_idx < count && trans == NULL) {
            int32_t idLen = 0;
            const UChar *uid = uenum_unext(uenum, &idLen, error);
            if (U_FAILURE(*error)) {
                DEBUG_LOG("%s", u_errorName(*error));
                break;
            }
            // this seems rather unlikely since we should have already broken
            // by the trans_idx exceeding the count
            if (uid == NULL) {
                break;
            }

            CFStringRef name = CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault, uid, idLen, kCFAllocatorNull);
            // It would have been nice if these stirng constants were actually defined somewhere in icu, but sadly they are runtime metadata...
            if ((CFEqual(name, CFSTR("Any-Remove")) && CFEqual(transform, kCFStringTransformStripCombiningMarks)) ||
                (CFEqual(name, CFSTR("Any-Latin")) && CFEqual(transform, kCFStringTransformToLatin)) ||
                (CFEqual(name, CFSTR("Latin-Katakana")) && CFEqual(transform, kCFStringTransformLatinKatakana)) ||
                (CFEqual(name, CFSTR("Latin-Hiragana")) && CFEqual(transform, kCFStringTransformLatinHiragana)) ||
                (CFEqual(name, CFSTR("Hiragana-Katakana")) && CFEqual(transform, kCFStringTransformHiraganaKatakana)) ||
                (CFEqual(name, CFSTR("Latin-Hangul")) && CFEqual(transform, kCFStringTransformLatinHangul)) ||
                (CFEqual(name, CFSTR("Latin-Arabic")) && CFEqual(transform, kCFStringTransformLatinArabic)) ||
                (CFEqual(name, CFSTR("Latin-Hebrew")) && CFEqual(transform, kCFStringTransformLatinHebrew)) ||
                (CFEqual(name, CFSTR("Latin-Thai")) && CFEqual(transform, kCFStringTransformLatinThai)) ||
                (CFEqual(name, CFSTR("Latin-Cyrillic")) && CFEqual(transform, kCFStringTransformLatinCyrillic)) ||
                (CFEqual(name, CFSTR("Latin-Greek")) && CFEqual(transform, kCFStringTransformLatinGreek)) ||
                (CFEqual(name, CFSTR("Any-Hex/XML")) && CFEqual(transform, kCFStringTransformToXMLHex)) ||
                (CFEqual(name, CFSTR("Any-Name")) && CFEqual(transform, kCFStringTransformToUnicodeName)) ||
                (CFEqual(name, CFSTR("Accents-Any")) && CFEqual(transform, kCFStringTransformStripDiacritics))) {
                trans = utrans_openU(uid, idLen, dir, NULL, 0, NULL, error);
            }
            CFRelease(name);
            trans_idx++;
        }
    } while (0);

    if (uenum != NULL) {
        uenum_reset(uenum, error);
        uenum_close(uenum);
    }

    if (trans == NULL && (CFEqual(transform, kCFStringTransformStripCombiningMarks) ||
                          CFEqual(transform, kCFStringTransformToLatin) ||
                          CFEqual(transform, kCFStringTransformLatinKatakana) ||
                          CFEqual(transform, kCFStringTransformLatinHiragana) ||
                          CFEqual(transform, kCFStringTransformHiraganaKatakana) ||
                          CFEqual(transform, kCFStringTransformLatinHangul) ||
                          CFEqual(transform, kCFStringTransformLatinArabic) ||
                          CFEqual(transform, kCFStringTransformLatinHebrew) ||
                          CFEqual(transform, kCFStringTransformLatinCyrillic) ||
                          CFEqual(transform, kCFStringTransformLatinGreek) ||
                          CFEqual(transform, kCFStringTransformToXMLHex) ||
                          CFEqual(transform, kCFStringTransformToUnicodeName) ||
                          CFEqual(transform, kCFStringTransformStripDiacritics))) {
        static dispatch_once_t once = 0L;
        dispatch_once(&once, ^{
            RELEASE_LOG("Unable to find transliterators in icu data: likely this is from not including the Transliterators section in building your icu.dat file");
        });
    }