コード例 #1
0
void UnicodeTest::TestScriptMetadata() {
    IcuTestErrorCode errorCode(*this, "TestScriptMetadata()");
    UnicodeSet rtl("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]", errorCode);
    // So far, sample characters are uppercase.
    // Georgian is special.
    UnicodeSet cased("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]", errorCode);
    for(int32_t sci = 0; sci < USCRIPT_CODE_LIMIT; ++sci) {
        UScriptCode sc = (UScriptCode)sci;
        // Run the test with -v to see which script has failures:
        // .../intltest$ make && ./intltest utility/UnicodeTest/TestScriptMetadata -v | grep -C 3 FAIL
        logln(uscript_getShortName(sc));
        UScriptUsage usage = uscript_getUsage(sc);
        UnicodeString sample = uscript_getSampleUnicodeString(sc);
        UnicodeSet scriptSet;
        scriptSet.applyIntPropertyValue(UCHAR_SCRIPT, sc, errorCode);
        if(usage == USCRIPT_USAGE_NOT_ENCODED) {
            assertTrue("not encoded, no sample", sample.isEmpty());
            assertFalse("not encoded, not RTL", uscript_isRightToLeft(sc));
            assertFalse("not encoded, not LB letters", uscript_breaksBetweenLetters(sc));
            assertFalse("not encoded, not cased", uscript_isCased(sc));
            assertTrue("not encoded, no characters", scriptSet.isEmpty());
        } else {
            assertFalse("encoded, has a sample character", sample.isEmpty());
            UChar32 firstChar = sample.char32At(0);
            UScriptCode charScript = getCharScript(sc);
            assertEquals("script(sample(script))",
                         charScript, uscript_getScript(firstChar, errorCode));
            assertEquals("RTL vs. set", rtl.contains(firstChar), uscript_isRightToLeft(sc));
            assertEquals("cased vs. set", cased.contains(firstChar), uscript_isCased(sc));
            assertEquals("encoded, has characters", sc == charScript, !scriptSet.isEmpty());
            if(uscript_isRightToLeft(sc)) {
                rtl.removeAll(scriptSet);
            }
            if(uscript_isCased(sc)) {
                cased.removeAll(scriptSet);
            }
        }
    }
    UnicodeString pattern;
    assertEquals("no remaining RTL characters",
                 UnicodeString("[]"), rtl.toPattern(pattern));
    assertEquals("no remaining cased characters",
                 UnicodeString("[]"), cased.toPattern(pattern));

    assertTrue("Hani breaks between letters", uscript_breaksBetweenLetters(USCRIPT_HAN));
    assertTrue("Thai breaks between letters", uscript_breaksBetweenLetters(USCRIPT_THAI));
    assertFalse("Latn does not break between letters", uscript_breaksBetweenLetters(USCRIPT_LATIN));
}
コード例 #2
0
ファイル: loclikely.cpp プロジェクト: icu-project/icu4c
// Implemented here because this calls uloc_addLikelySubtags().
U_CAPI UBool U_EXPORT2
uloc_isRightToLeft(const char *locale) {
    UErrorCode errorCode = U_ZERO_ERROR;
    char script[8];
    int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
    if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
            scriptLength == 0) {
        // Fastpath: We know the likely scripts and their writing direction
        // for some common languages.
        errorCode = U_ZERO_ERROR;
        char lang[8];
        int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
                langLength == 0) {
            return FALSE;
        }
        const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
        if (langPtr != NULL) {
            switch (langPtr[langLength]) {
            case '-': return FALSE;
            case '+': return TRUE;
            default: break;  // partial match of a longer code
            }
        }
        // Otherwise, find the likely script.
        errorCode = U_ZERO_ERROR;
        char likely[ULOC_FULLNAME_CAPACITY];
        (void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode);
        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
            return FALSE;
        }
        scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode);
        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
                scriptLength == 0) {
            return FALSE;
        }
    }
    UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
    return uscript_isRightToLeft(scriptCode);
}
コード例 #3
0
ファイル: cucdapi.c プロジェクト: cyrusimap/icu4c
void TestScriptMetadataAPI() {
    /* API & code coverage. More testing in intltest/ucdtest.cpp. */
    UErrorCode errorCode=U_ZERO_ERROR;
    UChar sample[8];

    if(uscript_getSampleString(USCRIPT_LATIN, sample, UPRV_LENGTHOF(sample), &errorCode)!=1 ||
            U_FAILURE(errorCode) ||
            uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
            sample[1]!=0) {
        log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
    }
    sample[0]=0xfffe;
    if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
            errorCode!=U_BUFFER_OVERFLOW_ERROR ||
            sample[0]!=0xfffe) {
        log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
    }
    errorCode=U_ZERO_ERROR;
    if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, UPRV_LENGTHOF(sample), &errorCode)!=0 ||
            U_FAILURE(errorCode) ||
            sample[0]!=0) {
        log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
    }
    sample[0]=0xfffe;
    if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
            errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
            sample[0]!=0xfffe) {
        log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
    }

    if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
            uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_ASPIRATIONAL ||
            uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
            uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
            uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
            uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
            uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) {
        log_err("uscript_getUsage() failed\n");
    }

    if(uscript_isRightToLeft(USCRIPT_LATIN) ||
            uscript_isRightToLeft(USCRIPT_CIRTH) ||
            !uscript_isRightToLeft(USCRIPT_ARABIC) ||
            !uscript_isRightToLeft(USCRIPT_HEBREW)) {
        log_err("uscript_isRightToLeft() failed\n");
    }

    if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
            uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
            !uscript_breaksBetweenLetters(USCRIPT_HAN) ||
            !uscript_breaksBetweenLetters(USCRIPT_THAI)) {
        log_err("uscript_breaksBetweenLetters() failed\n");
    }

    if(uscript_isCased(USCRIPT_CIRTH) ||
            uscript_isCased(USCRIPT_HAN) ||
            !uscript_isCased(USCRIPT_LATIN) ||
            !uscript_isCased(USCRIPT_GREEK)) {
        log_err("uscript_isCased() failed\n");
    }
}