void CollationThaiTest::TestReordering(void) { const char *tests[] = { "\\u0E41c\\u0301", "=", "\\u0E41\\u0107", // composition "\\u0E41\\uD835\\uDFCE", "<", "\\u0E41\\uD835\\uDFCF", // supplementaries "\\u0E41\\uD834\\uDD5F", "=", "\\u0E41\\uD834\\uDD58\\uD834\\uDD65", // supplementary composition decomps to supplementary "\\u0E41\\uD87E\\uDC02", "=", "\\u0E41\\u4E41", // supplementary composition decomps to BMP "\\u0E41\\u0301", "=", "\\u0E41\\u0301", // unsafe (just checking backwards iteration) "\\u0E41\\u0301\\u0316", "=", "\\u0E41\\u0316\\u0301", // after UCA 4.1, the two lines below are not equal anymore do not have equal sign "\\u0e24\\u0e41", "<", "\\u0e41\\u0e24", // exiting contraction bug "\\u0e3f\\u0e3f\\u0e24\\u0e41", "<", "\\u0e3f\\u0e3f\\u0e41\\u0e24", "abc\\u0E41c\\u0301", "=", "abc\\u0E41\\u0107", // composition "abc\\u0E41\\uD834\\uDC00", "<", "abc\\u0E41\\uD834\\uDC01", // supplementaries "abc\\u0E41\\uD834\\uDD5F", "=", "abc\\u0E41\\uD834\\uDD58\\uD834\\uDD65", // supplementary composition decomps to supplementary "abc\\u0E41\\uD87E\\uDC02", "=", "abc\\u0E41\\u4E41", // supplementary composition decomps to BMP "abc\\u0E41\\u0301", "=", "abc\\u0E41\\u0301", // unsafe (just checking backwards iteration) "abc\\u0E41\\u0301\\u0316", "=", "abc\\u0E41\\u0316\\u0301", "\\u0E41c\\u0301abc", "=", "\\u0E41\\u0107abc", // composition "\\u0E41\\uD834\\uDC00abc", "<", "\\u0E41\\uD834\\uDC01abc", // supplementaries "\\u0E41\\uD834\\uDD5Fabc", "=", "\\u0E41\\uD834\\uDD58\\uD834\\uDD65abc", // supplementary composition decomps to supplementary "\\u0E41\\uD87E\\uDC02abc", "=", "\\u0E41\\u4E41abc", // supplementary composition decomps to BMP "\\u0E41\\u0301abc", "=", "\\u0E41\\u0301abc", // unsafe (just checking backwards iteration) "\\u0E41\\u0301\\u0316abc", "=", "\\u0E41\\u0316\\u0301abc", "abc\\u0E41c\\u0301abc", "=", "abc\\u0E41\\u0107abc", // composition "abc\\u0E41\\uD834\\uDC00abc", "<", "abc\\u0E41\\uD834\\uDC01abc", // supplementaries "abc\\u0E41\\uD834\\uDD5Fabc", "=", "abc\\u0E41\\uD834\\uDD58\\uD834\\uDD65abc", // supplementary composition decomps to supplementary "abc\\u0E41\\uD87E\\uDC02abc", "=", "abc\\u0E41\\u4E41abc", // supplementary composition decomps to BMP "abc\\u0E41\\u0301abc", "=", "abc\\u0E41\\u0301abc", // unsafe (just checking backwards iteration) "abc\\u0E41\\u0301\\u0316abc", "=", "abc\\u0E41\\u0316\\u0301abc", }; compareArray(*coll, tests, sizeof(tests)/sizeof(tests[0])); const char *rule = "& c < ab"; const char *testcontraction[] = { "\\u0E41ab", ">", "\\u0E41c"}; // After UCA 4.1 Thai are normal so won't break a contraction UnicodeString rules; UErrorCode status = U_ZERO_ERROR; parseChars(rules, rule); RuleBasedCollator *rcoll = new RuleBasedCollator(rules, status); if(U_SUCCESS(status)) { compareArray(*rcoll, testcontraction, 3); delete rcoll; } else { errln("Couldn't instantiate collator from rules"); } }
void CollationThaiTest::TestReordering(void) { // Until UCA 4.1, the collation code swapped Thai/Lao prevowels with the following consonants, // resulting in consonant+prevowel == prevowel+consonant. // From UCA 5.0 on, there are order-reversing contractions for prevowel+consonant. // From UCA 5.0 until UCA 6.1, there was a tertiary difference between // consonant+prevowel and prevowel+consonant. // In UCA 6.2, they compare equal again. // The test was modified to using a collator with strength=secondary, // ignoring possible tertiary differences. const char *tests[] = { "\\u0E41c\\u0301", "=", "\\u0E41\\u0107", // composition "\\u0E41\\U0001D7CE", "<", "\\u0E41\\U0001D7CF", // supplementaries "\\u0E41\\U0001D15F", "=", "\\u0E41\\U0001D158\\U0001D165", // supplementary composition decomps to supplementary "\\u0E41\\U0002F802", "=", "\\u0E41\\u4E41", // supplementary composition decomps to BMP "\\u0E41\\u0301", "=", "\\u0E41\\u0301", // unsafe (just checking backwards iteration) "\\u0E41\\u0301\\u0316", "=", "\\u0E41\\u0316\\u0301", "\\u0e24\\u0e41", "=", "\\u0e41\\u0e24", // exiting contraction bug "\\u0e3f\\u0e3f\\u0e24\\u0e41", "=", "\\u0e3f\\u0e3f\\u0e41\\u0e24", "abc\\u0E41c\\u0301", "=", "abc\\u0E41\\u0107", // composition "abc\\u0E41\\U0001D000", "<", "abc\\u0E41\\U0001D001", // supplementaries "abc\\u0E41\\U0001D15F", "=", "abc\\u0E41\\U0001D158\\U0001D165", // supplementary composition decomps to supplementary "abc\\u0E41\\U0002F802", "=", "abc\\u0E41\\u4E41", // supplementary composition decomps to BMP "abc\\u0E41\\u0301", "=", "abc\\u0E41\\u0301", // unsafe (just checking backwards iteration) "abc\\u0E41\\u0301\\u0316", "=", "abc\\u0E41\\u0316\\u0301", "\\u0E41c\\u0301abc", "=", "\\u0E41\\u0107abc", // composition "\\u0E41\\U0001D000abc", "<", "\\u0E41\\U0001D001abc", // supplementaries "\\u0E41\\U0001D15Fabc", "=", "\\u0E41\\U0001D158\\U0001D165abc", // supplementary composition decomps to supplementary "\\u0E41\\U0002F802abc", "=", "\\u0E41\\u4E41abc", // supplementary composition decomps to BMP "\\u0E41\\u0301abc", "=", "\\u0E41\\u0301abc", // unsafe (just checking backwards iteration) "\\u0E41\\u0301\\u0316abc", "=", "\\u0E41\\u0316\\u0301abc", "abc\\u0E41c\\u0301abc", "=", "abc\\u0E41\\u0107abc", // composition "abc\\u0E41\\U0001D000abc", "<", "abc\\u0E41\\U0001D001abc", // supplementaries "abc\\u0E41\\U0001D15Fabc", "=", "abc\\u0E41\\U0001D158\\U0001D165abc", // supplementary composition decomps to supplementary "abc\\u0E41\\U0002F802abc", "=", "abc\\u0E41\\u4E41abc", // supplementary composition decomps to BMP "abc\\u0E41\\u0301abc", "=", "abc\\u0E41\\u0301abc", // unsafe (just checking backwards iteration) "abc\\u0E41\\u0301\\u0316abc", "=", "abc\\u0E41\\u0316\\u0301abc", }; LocalPointer<Collator> coll2(coll->clone()); UErrorCode status = U_ZERO_ERROR; coll2->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); if(U_FAILURE(status)) { errln("Unable to set the Thai collator clone to secondary strength"); return; } compareArray(*coll2, tests, sizeof(tests)/sizeof(tests[0])); const char *rule = "& c < ab"; const char *testcontraction[] = { "\\u0E41ab", ">", "\\u0E41c"}; // After UCA 4.1 Thai are normal so won't break a contraction UnicodeString rules; parseChars(rules, rule); LocalPointer<RuleBasedCollator> rcoll(new RuleBasedCollator(rules, status)); if(U_SUCCESS(status)) { compareArray(*rcoll, testcontraction, 3); } else { errln("Couldn't instantiate collator from rules"); } }
void CollationThaiTest::compareArray(Collator& c, const char* tests[], int32_t testsLength) { for (int32_t i = 0; i < testsLength; i += 3) { Collator::EComparisonResult expect; if (tests[i+1][0] == '<') { expect = Collator::LESS; } else if (tests[i+1][0] == '>') { expect = Collator::GREATER; } else if (tests[i+1][0] == '=') { expect = Collator::EQUAL; } else { // expect = Integer.decode(tests[i+1]).intValue(); errln((UnicodeString)"Error: unknown operator " + tests[i+1]); return; } UnicodeString s1, s2; parseChars(s1, tests[i]); parseChars(s2, tests[i+2]); doTest(&c, s1, s2, expect); #if 0 UErrorCode status = U_ZERO_ERROR; int32_t result = c.compare(s1, s2); if (sign(result) != sign(expect)) { UnicodeString t1, t2; errln(UnicodeString("") + i/3 + ": compare(" + IntlTest::prettify(s1, t1) + " , " + IntlTest::prettify(s2, t2) + ") got " + result + "; expected " + expect); CollationKey k1, k2; c.getCollationKey(s1, k1, status); c.getCollationKey(s2, k2, status); if (U_FAILURE(status)) { errln((UnicodeString)"Fail: getCollationKey returned " + u_errorName(status)); return; } errln((UnicodeString)" key1: " + prettify(k1, t1) ); errln((UnicodeString)" key2: " + prettify(k2, t2) ); } else { // Collator.compare worked OK; now try the collation keys CollationKey k1, k2; c.getCollationKey(s1, k1, status); c.getCollationKey(s2, k2, status); if (U_FAILURE(status)) { errln((UnicodeString)"Fail: getCollationKey returned " + u_errorName(status)); return; } result = k1.compareTo(k2); if (sign(result) != sign(expect)) { UnicodeString t1, t2; errln(UnicodeString("") + i/3 + ": key(" + IntlTest::prettify(s1, t1) + ").compareTo(key(" + IntlTest::prettify(s2, t2) + ")) got " + result + "; expected " + expect); errln((UnicodeString)" " + prettify(k1, t1) + " vs. " + prettify(k2, t2)); } } #endif } }