// @bug 4114077 // // Collation with decomposition off doesn't work for Europe // void CollationRegressionTest::Test4114077(/* char* par */) { // Ensure that we get the same results with decomposition off // as we do with it on.... UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); c->setStrength(Collator::TERTIARY); static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}, // Should be equivalent {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0}, {0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0}, {0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0}, // a-ring-acute -> a-ring, acute // -> a, ring, acute {0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0} // No reordering --> unequal }; c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); compareArray(*c, test1, UPRV_LENGTHOF(test1)); static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} // Reordering --> equal }; c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); compareArray(*c, test2, UPRV_LENGTHOF(test2)); delete c; }
// @bug 4087241 // // string comparison errors in Scandinavian collators // void CollationRegressionTest::Test4087241(/* char* par */) { UErrorCode status = U_ZERO_ERROR; Locale da_DK("da", "DK"); RuleBasedCollator *c = NULL; c = (RuleBasedCollator *) Collator::createInstance(da_DK, status); if (c == NULL || U_FAILURE(status)) { errln("Failed to create collator for da_DK locale"); delete c; return; } c->setStrength(Collator::SECONDARY); static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-umlaut < a-ring {0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-umlaut }; compareArray(*c, tests, UPRV_LENGTHOF(tests)); delete c; }
// @bug 4062418 // // Secondary/Tertiary comparison incorrect in French Secondary // void CollationRegressionTest::Test4062418(/* char* par */) { UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *c = NULL; c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench(), status); if (c == NULL || U_FAILURE(status)) { errln("Failed to create collator for Locale::getCanadaFrench()"); delete c; return; } c->setStrength(Collator::SECONDARY); /* String[] tests = { "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater }; */ static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0} }; compareArray(*c, tests, UPRV_LENGTHOF(tests)); delete c; }
void CollationIteratorTest::TestStrengthOrder() { int order = 0x0123ABCD; UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *coll = (RuleBasedCollator *)Collator::createInstance(status); if (coll == NULL || U_FAILURE(status)) { errln("Couldn't create a default collator."); return; } coll->setStrength(Collator::PRIMARY); CollationElementIterator *iter = coll->createCollationElementIterator(test1); if (iter == NULL) { errln("Couldn't create a collation element iterator from default collator"); return; } if (iter->strengthOrder(order) != 0x01230000) { errln("Strength order for a primary strength collator should be the first 2 bytes"); return; } coll->setStrength(Collator::SECONDARY); if (iter->strengthOrder(order) != 0x0123AB00) { errln("Strength order for a secondary strength collator should be the third byte"); return; } coll->setStrength(Collator::TERTIARY); if (iter->strengthOrder(order) != order) { errln("Strength order for a tertiary strength collator should be the third byte"); return; } delete iter; delete coll; }
// @bug 4087243 // // CollationKey takes ignorable strings into account when it shouldn't // void CollationRegressionTest::Test4087243(/* char* par */) { RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); c->setStrength(Collator::TERTIARY); static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1 2 3 = 1 2 3 ctrl-A }; compareArray(*c, tests, ARRAY_LENGTH(tests)); delete c; }
// @bug 4103436 // // Collator::compare not handling spaces properly // void CollationRegressionTest::Test4103436(/* char* par */) { RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); c->setStrength(Collator::TERTIARY); static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}, {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0} }; compareArray(*c, tests, ARRAY_LENGTH(tests)); delete c; }
// @bug 4076676 // // Bad canonicalization of same-class combining characters // void CollationRegressionTest::Test4076676(/* char* par */) { // These combining characters are all in the same class, so they should not // be reordered, and they should compare as unequal. static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0}; static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0}; RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); c->setStrength(Collator::TERTIARY); if (c->compare(s1,s2) == 0) { errln("Same-class combining chars were reordered"); } delete c; }
// @bug 4054736 // // Full Decomposition mode not implemented // void CollationRegressionTest::Test4054736(/* char* par */) { UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); c->setStrength(Collator::SECONDARY); c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed }; compareArray(*c, tests, ARRAY_LENGTH(tests)); delete c; }
// @bug 4114076 // // Collation not Unicode conformant with Hangul syllables // void CollationRegressionTest::Test4114076(/* char* par */) { UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); c->setStrength(Collator::TERTIARY); // // With Canonical decomposition, Hangul syllables should get decomposed // into Jamo, but Jamo characters should not be decomposed into // conjoining Jamo // static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0} }; c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); compareArray(*c, test1, ARRAY_LENGTH(test1)); // From UTR #15: // *In earlier versions of Unicode, jamo characters like ksf // had compatibility mappings to kf + sf. These mappings were // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.) // That is, the following test is obsolete as of 2.1.9 //obsolete- // With Full decomposition, it should go all the way down to //obsolete- // conjoining Jamo characters. //obsolete- // //obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = //obsolete- { //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0} //obsolete- }; //obsolete- //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT); //obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2)); delete c; }
// @bug 4066696 // // French secondary collation checking at the end of compare iteration fails // void CollationRegressionTest::Test4066696(/* char* par */) { UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *c = NULL; c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status); if (c == NULL || U_FAILURE(status)) { errln("Failure creating collator for Locale::getCanadaFrench()"); delete c; return; } c->setStrength(Collator::SECONDARY); /* String[] tests = { "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute }; should be: String[] tests = { "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute }; */ static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0} }; compareArray(*c, tests, ARRAY_LENGTH(tests)); delete c; }
// @bug 4081866 // // Combining characters in different classes not reordered properly. // void CollationRegressionTest::Test4081866(/* char* par */) { // These combining characters are all in different classes, // so they should be reordered and the strings should compare as equal. static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0}; static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0}; UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); c->setStrength(Collator::TERTIARY); // Now that the default collators are set to NO_DECOMPOSITION // (as a result of fixing bug 4114077), we must set it explicitly // when we're testing reordering behavior. -- lwerner, 5/5/98 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); if (c->compare(s1,s2) != 0) { errln("Combining chars were not reordered"); } delete c; }
// @bug 4054734 // // Collator::IDENTICAL documented but not implemented // void CollationRegressionTest::Test4054734(/* char* par */) { /* Here's the original Java: String[] decomp = { "\u0001", "<", "\u0002", "\u0001", "=", "\u0001", "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise "\u00C0", "=", "A\u0300" // Decomp should make these equal }; String[] nodecomp = { "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave }; */ static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0x0001, 0}, {0x3c, 0}, {0x0002, 0}, {0x0001, 0}, {0x3d, 0}, {0x0001, 0}, {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0}, {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0} }; UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); c->setStrength(Collator::IDENTICAL); c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); compareArray(*c, decomp, ARRAY_LENGTH(decomp)); delete c; }
// @bug 4060154 // // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I" // void CollationRegressionTest::Test4060154(/* char* par */) { UErrorCode status = U_ZERO_ERROR; UnicodeString rules; rules += "&f < g, G < h, H < i, I < j, J"; rules += " & H < "; rules += (UChar)0x0131; rules += ", "; rules += (UChar)0x0130; rules += ", i, I"; RuleBasedCollator *c = NULL; c = new RuleBasedCollator(rules, status); if (c == NULL || U_FAILURE(status)) { errln("failure building collator."); delete c; return; } c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); /* String[] tertiary = { "A", "<", "B", "H", "<", "\u0131", "H", "<", "I", "\u0131", "<", "\u0130", "\u0130", "<", "i", "\u0130", ">", "H", }; */ static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0x41, 0}, {0x3c, 0}, {0x42, 0}, {0x48, 0}, {0x3c, 0}, {0x0131, 0}, {0x48, 0}, {0x3c, 0}, {0x49, 0}, {0x0131, 0}, {0x3c, 0}, {0x0130, 0}, {0x0130, 0}, {0x3c, 0}, {0x69, 0}, {0x0130, 0}, {0x3e, 0}, {0x48, 0} }; c->setStrength(Collator::TERTIARY); compareArray(*c, tertiary, ARRAY_LENGTH(tertiary)); /* String[] secondary = { "H", "<", "I", "\u0131", "=", "\u0130", }; */ static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0x48, 0}, {0x3c, 0}, {0x49, 0}, {0x0131, 0}, {0x3d, 0}, {0x0130, 0} }; c->setStrength(Collator::PRIMARY); compareArray(*c, secondary, ARRAY_LENGTH(secondary)); delete c; }