void UnicodeTest::TestScriptMetadata() { IcuTestErrorCode errorCode(*this, "TestScriptMetadata()"); UnicodeSet rtl("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]", errorCode); // So far, sample characters are uppercase. // Georgian is special. UnicodeSet cased("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]", errorCode); for(int32_t sci = 0; sci < USCRIPT_CODE_LIMIT; ++sci) { UScriptCode sc = (UScriptCode)sci; // Run the test with -v to see which script has failures: // .../intltest$ make && ./intltest utility/UnicodeTest/TestScriptMetadata -v | grep -C 3 FAIL logln(uscript_getShortName(sc)); UScriptUsage usage = uscript_getUsage(sc); UnicodeString sample = uscript_getSampleUnicodeString(sc); UnicodeSet scriptSet; scriptSet.applyIntPropertyValue(UCHAR_SCRIPT, sc, errorCode); if(usage == USCRIPT_USAGE_NOT_ENCODED) { assertTrue("not encoded, no sample", sample.isEmpty()); assertFalse("not encoded, not RTL", uscript_isRightToLeft(sc)); assertFalse("not encoded, not LB letters", uscript_breaksBetweenLetters(sc)); assertFalse("not encoded, not cased", uscript_isCased(sc)); assertTrue("not encoded, no characters", scriptSet.isEmpty()); } else { assertFalse("encoded, has a sample character", sample.isEmpty()); UChar32 firstChar = sample.char32At(0); UScriptCode charScript = getCharScript(sc); assertEquals("script(sample(script))", charScript, uscript_getScript(firstChar, errorCode)); assertEquals("RTL vs. set", rtl.contains(firstChar), uscript_isRightToLeft(sc)); assertEquals("cased vs. set", cased.contains(firstChar), uscript_isCased(sc)); assertEquals("encoded, has characters", sc == charScript, !scriptSet.isEmpty()); if(uscript_isRightToLeft(sc)) { rtl.removeAll(scriptSet); } if(uscript_isCased(sc)) { cased.removeAll(scriptSet); } } } UnicodeString pattern; assertEquals("no remaining RTL characters", UnicodeString("[]"), rtl.toPattern(pattern)); assertEquals("no remaining cased characters", UnicodeString("[]"), cased.toPattern(pattern)); assertTrue("Hani breaks between letters", uscript_breaksBetweenLetters(USCRIPT_HAN)); assertTrue("Thai breaks between letters", uscript_breaksBetweenLetters(USCRIPT_THAI)); assertFalse("Latn does not break between letters", uscript_breaksBetweenLetters(USCRIPT_LATIN)); }
void TestScriptMetadataAPI() { /* API & code coverage. More testing in intltest/ucdtest.cpp. */ UErrorCode errorCode=U_ZERO_ERROR; UChar sample[8]; if(uscript_getSampleString(USCRIPT_LATIN, sample, UPRV_LENGTHOF(sample), &errorCode)!=1 || U_FAILURE(errorCode) || uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN || sample[1]!=0) { log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode)); } sample[0]=0xfffe; if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 || errorCode!=U_BUFFER_OVERFLOW_ERROR || sample[0]!=0xfffe) { log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, UPRV_LENGTHOF(sample), &errorCode)!=0 || U_FAILURE(errorCode) || sample[0]!=0) { log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode)); } sample[0]=0xfffe; if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 || errorCode!=U_STRING_NOT_TERMINATED_WARNING || sample[0]!=0xfffe) { log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode)); } if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED || uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_ASPIRATIONAL || uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE || uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED || uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED || uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED || uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) { log_err("uscript_getUsage() failed\n"); } if(uscript_isRightToLeft(USCRIPT_LATIN) || uscript_isRightToLeft(USCRIPT_CIRTH) || !uscript_isRightToLeft(USCRIPT_ARABIC) || !uscript_isRightToLeft(USCRIPT_HEBREW)) { log_err("uscript_isRightToLeft() failed\n"); } if(uscript_breaksBetweenLetters(USCRIPT_LATIN) || uscript_breaksBetweenLetters(USCRIPT_CIRTH) || !uscript_breaksBetweenLetters(USCRIPT_HAN) || !uscript_breaksBetweenLetters(USCRIPT_THAI)) { log_err("uscript_breaksBetweenLetters() failed\n"); } if(uscript_isCased(USCRIPT_CIRTH) || uscript_isCased(USCRIPT_HAN) || !uscript_isCased(USCRIPT_LATIN) || !uscript_isCased(USCRIPT_GREEK)) { log_err("uscript_isCased() failed\n"); } }