void UnicodeTest::TestScriptMetadata() { IcuTestErrorCode errorCode(*this, "TestScriptMetadata()"); UnicodeSet rtl("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]", errorCode); // So far, sample characters are uppercase. // Georgian is special. UnicodeSet cased("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]", errorCode); for(int32_t sci = 0; sci < USCRIPT_CODE_LIMIT; ++sci) { UScriptCode sc = (UScriptCode)sci; // Run the test with -v to see which script has failures: // .../intltest$ make && ./intltest utility/UnicodeTest/TestScriptMetadata -v | grep -C 3 FAIL logln(uscript_getShortName(sc)); UScriptUsage usage = uscript_getUsage(sc); UnicodeString sample = uscript_getSampleUnicodeString(sc); UnicodeSet scriptSet; scriptSet.applyIntPropertyValue(UCHAR_SCRIPT, sc, errorCode); if(usage == USCRIPT_USAGE_NOT_ENCODED) { assertTrue("not encoded, no sample", sample.isEmpty()); assertFalse("not encoded, not RTL", uscript_isRightToLeft(sc)); assertFalse("not encoded, not LB letters", uscript_breaksBetweenLetters(sc)); assertFalse("not encoded, not cased", uscript_isCased(sc)); assertTrue("not encoded, no characters", scriptSet.isEmpty()); } else { assertFalse("encoded, has a sample character", sample.isEmpty()); UChar32 firstChar = sample.char32At(0); UScriptCode charScript = getCharScript(sc); assertEquals("script(sample(script))", charScript, uscript_getScript(firstChar, errorCode)); assertEquals("RTL vs. set", rtl.contains(firstChar), uscript_isRightToLeft(sc)); assertEquals("cased vs. set", cased.contains(firstChar), uscript_isCased(sc)); assertEquals("encoded, has characters", sc == charScript, !scriptSet.isEmpty()); if(uscript_isRightToLeft(sc)) { rtl.removeAll(scriptSet); } if(uscript_isCased(sc)) { cased.removeAll(scriptSet); } } } UnicodeString pattern; assertEquals("no remaining RTL characters", UnicodeString("[]"), rtl.toPattern(pattern)); assertEquals("no remaining cased characters", UnicodeString("[]"), cased.toPattern(pattern)); assertTrue("Hani breaks between letters", uscript_breaksBetweenLetters(USCRIPT_HAN)); assertTrue("Thai breaks between letters", uscript_breaksBetweenLetters(USCRIPT_THAI)); assertFalse("Latn does not break between letters", uscript_breaksBetweenLetters(USCRIPT_LATIN)); }
// Implemented here because this calls uloc_addLikelySubtags(). U_CAPI UBool U_EXPORT2 uloc_isRightToLeft(const char *locale) { UErrorCode errorCode = U_ZERO_ERROR; char script[8]; int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode); if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || scriptLength == 0) { // Fastpath: We know the likely scripts and their writing direction // for some common languages. errorCode = U_ZERO_ERROR; char lang[8]; int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode); if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || langLength == 0) { return FALSE; } const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang); if (langPtr != NULL) { switch (langPtr[langLength]) { case '-': return FALSE; case '+': return TRUE; default: break; // partial match of a longer code } } // Otherwise, find the likely script. errorCode = U_ZERO_ERROR; char likely[ULOC_FULLNAME_CAPACITY]; (void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode); if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) { return FALSE; } scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode); if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || scriptLength == 0) { return FALSE; } } UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script); return uscript_isRightToLeft(scriptCode); }
void TestScriptMetadataAPI() { /* API & code coverage. More testing in intltest/ucdtest.cpp. */ UErrorCode errorCode=U_ZERO_ERROR; UChar sample[8]; if(uscript_getSampleString(USCRIPT_LATIN, sample, UPRV_LENGTHOF(sample), &errorCode)!=1 || U_FAILURE(errorCode) || uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN || sample[1]!=0) { log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode)); } sample[0]=0xfffe; if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 || errorCode!=U_BUFFER_OVERFLOW_ERROR || sample[0]!=0xfffe) { log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, UPRV_LENGTHOF(sample), &errorCode)!=0 || U_FAILURE(errorCode) || sample[0]!=0) { log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode)); } sample[0]=0xfffe; if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 || errorCode!=U_STRING_NOT_TERMINATED_WARNING || sample[0]!=0xfffe) { log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode)); } if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED || uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_ASPIRATIONAL || uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE || uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED || uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED || uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED || uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) { log_err("uscript_getUsage() failed\n"); } if(uscript_isRightToLeft(USCRIPT_LATIN) || uscript_isRightToLeft(USCRIPT_CIRTH) || !uscript_isRightToLeft(USCRIPT_ARABIC) || !uscript_isRightToLeft(USCRIPT_HEBREW)) { log_err("uscript_isRightToLeft() failed\n"); } if(uscript_breaksBetweenLetters(USCRIPT_LATIN) || uscript_breaksBetweenLetters(USCRIPT_CIRTH) || !uscript_breaksBetweenLetters(USCRIPT_HAN) || !uscript_breaksBetweenLetters(USCRIPT_THAI)) { log_err("uscript_breaksBetweenLetters() failed\n"); } if(uscript_isCased(USCRIPT_CIRTH) || uscript_isCased(USCRIPT_HAN) || !uscript_isCased(USCRIPT_LATIN) || !uscript_isCased(USCRIPT_GREEK)) { log_err("uscript_isCased() failed\n"); } }