示例#1
0
void UnicodeTest::TestScriptMetadata() {
    IcuTestErrorCode errorCode(*this, "TestScriptMetadata()");
    UnicodeSet rtl("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]", errorCode);
    // So far, sample characters are uppercase.
    // Georgian is special.
    UnicodeSet cased("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]", errorCode);
    for(int32_t sci = 0; sci < USCRIPT_CODE_LIMIT; ++sci) {
        UScriptCode sc = (UScriptCode)sci;
        // Run the test with -v to see which script has failures:
        // .../intltest$ make && ./intltest utility/UnicodeTest/TestScriptMetadata -v | grep -C 3 FAIL
        logln(uscript_getShortName(sc));
        UScriptUsage usage = uscript_getUsage(sc);
        UnicodeString sample = uscript_getSampleUnicodeString(sc);
        UnicodeSet scriptSet;
        scriptSet.applyIntPropertyValue(UCHAR_SCRIPT, sc, errorCode);
        if(usage == USCRIPT_USAGE_NOT_ENCODED) {
            assertTrue("not encoded, no sample", sample.isEmpty());
            assertFalse("not encoded, not RTL", uscript_isRightToLeft(sc));
            assertFalse("not encoded, not LB letters", uscript_breaksBetweenLetters(sc));
            assertFalse("not encoded, not cased", uscript_isCased(sc));
            assertTrue("not encoded, no characters", scriptSet.isEmpty());
        } else {
            assertFalse("encoded, has a sample character", sample.isEmpty());
            UChar32 firstChar = sample.char32At(0);
            UScriptCode charScript = getCharScript(sc);
            assertEquals("script(sample(script))",
                         charScript, uscript_getScript(firstChar, errorCode));
            assertEquals("RTL vs. set", rtl.contains(firstChar), uscript_isRightToLeft(sc));
            assertEquals("cased vs. set", cased.contains(firstChar), uscript_isCased(sc));
            assertEquals("encoded, has characters", sc == charScript, !scriptSet.isEmpty());
            if(uscript_isRightToLeft(sc)) {
                rtl.removeAll(scriptSet);
            }
            if(uscript_isCased(sc)) {
                cased.removeAll(scriptSet);
            }
        }
    }
    UnicodeString pattern;
    assertEquals("no remaining RTL characters",
                 UnicodeString("[]"), rtl.toPattern(pattern));
    assertEquals("no remaining cased characters",
                 UnicodeString("[]"), cased.toPattern(pattern));

    assertTrue("Hani breaks between letters", uscript_breaksBetweenLetters(USCRIPT_HAN));
    assertTrue("Thai breaks between letters", uscript_breaksBetweenLetters(USCRIPT_THAI));
    assertFalse("Latn does not break between letters", uscript_breaksBetweenLetters(USCRIPT_LATIN));
}
示例#2
0
文件: cucdapi.c 项目: cyrusimap/icu4c
void TestScriptMetadataAPI() {
    /* API & code coverage. More testing in intltest/ucdtest.cpp. */
    UErrorCode errorCode=U_ZERO_ERROR;
    UChar sample[8];

    if(uscript_getSampleString(USCRIPT_LATIN, sample, UPRV_LENGTHOF(sample), &errorCode)!=1 ||
            U_FAILURE(errorCode) ||
            uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
            sample[1]!=0) {
        log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
    }
    sample[0]=0xfffe;
    if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
            errorCode!=U_BUFFER_OVERFLOW_ERROR ||
            sample[0]!=0xfffe) {
        log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
    }
    errorCode=U_ZERO_ERROR;
    if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, UPRV_LENGTHOF(sample), &errorCode)!=0 ||
            U_FAILURE(errorCode) ||
            sample[0]!=0) {
        log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
    }
    sample[0]=0xfffe;
    if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
            errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
            sample[0]!=0xfffe) {
        log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
    }

    if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
            uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_ASPIRATIONAL ||
            uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
            uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
            uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
            uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
            uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) {
        log_err("uscript_getUsage() failed\n");
    }

    if(uscript_isRightToLeft(USCRIPT_LATIN) ||
            uscript_isRightToLeft(USCRIPT_CIRTH) ||
            !uscript_isRightToLeft(USCRIPT_ARABIC) ||
            !uscript_isRightToLeft(USCRIPT_HEBREW)) {
        log_err("uscript_isRightToLeft() failed\n");
    }

    if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
            uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
            !uscript_breaksBetweenLetters(USCRIPT_HAN) ||
            !uscript_breaksBetweenLetters(USCRIPT_THAI)) {
        log_err("uscript_breaksBetweenLetters() failed\n");
    }

    if(uscript_isCased(USCRIPT_CIRTH) ||
            uscript_isCased(USCRIPT_HAN) ||
            !uscript_isCased(USCRIPT_LATIN) ||
            !uscript_isCased(USCRIPT_GREEK)) {
        log_err("uscript_isCased() failed\n");
    }
}