/** * Returns the collator to use for lenient parsing. The collator is lazily created: * this function creates it the first time it's called. * @return The collator to use for lenient parsing, or null if lenient parsing * is turned off. */ Collator* RuleBasedNumberFormat::getCollator() const { #if !UCONFIG_NO_COLLATION if (!ruleSets) { return NULL; } // lazy-evaulate the collator if (collator == NULL && lenient) { // create a default collator based on the formatter's locale, // then pull out that collator's rules, append any additional // rules specified in the description, and create a _new_ // collator based on the combinaiton of those rules UErrorCode status = U_ZERO_ERROR; Collator* temp = Collator::createInstance(locale, status); RuleBasedCollator* newCollator; if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) { if (lenientParseRules) { UnicodeString rules(newCollator->getRules()); rules.append(*lenientParseRules); newCollator = new RuleBasedCollator(rules, status); // Exit if newCollator could not be created. if (newCollator == NULL) { return NULL; } } else { temp = NULL; } if (U_SUCCESS(status)) { newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status); // cast away const ((RuleBasedNumberFormat*)this)->collator = newCollator; } else { delete newCollator; } } delete temp; } #endif // if lenient-parse mode is off, this will be null // (see setLenientParseMode()) return collator; }
// @bug 4054736 // // Full Decomposition mode not implemented // void CollationRegressionTest::Test4054736(/* char* par */) { UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); c->setStrength(Collator::SECONDARY); c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed }; compareArray(*c, tests, ARRAY_LENGTH(tests)); delete c; }
// @bug 4076676 // // Bad canonicalization of same-class combining characters // void CollationRegressionTest::Test4076676(/* char* par */) { // These combining characters are all in the same class, so they should not // be reordered, and they should compare as unequal. static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0}; static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0}; RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); c->setStrength(Collator::TERTIARY); if (c->compare(s1,s2) == 0) { errln("Same-class combining chars were reordered"); } delete c; }
U_CAPI UCollator* U_EXPORT2 ucol_openBinary(const uint8_t *bin, int32_t length, const UCollator *base, UErrorCode *status) { if(U_FAILURE(*status)) { return NULL; } RuleBasedCollator *coll = new RuleBasedCollator( bin, length, RuleBasedCollator::rbcFromUCollator(base), *status); if(coll == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; return NULL; } if(U_FAILURE(*status)) { delete coll; return NULL; } return coll->toUCollator(); }
void CollationIteratorTest::TestStrengthOrder() { int order = 0x0123ABCD; UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *coll = (RuleBasedCollator *)Collator::createInstance(status); if (coll == NULL || U_FAILURE(status)) { errln("Couldn't create a default collator."); return; } coll->setStrength(Collator::PRIMARY); CollationElementIterator *iter = coll->createCollationElementIterator(test1); if (iter == NULL) { errln("Couldn't create a collation element iterator from default collator"); return; } if (iter->strengthOrder(order) != 0x01230000) { errln("Strength order for a primary strength collator should be the first 2 bytes"); return; } coll->setStrength(Collator::SECONDARY); if (iter->strengthOrder(order) != 0x0123AB00) { errln("Strength order for a secondary strength collator should be the third byte"); return; } coll->setStrength(Collator::TERTIARY); if (iter->strengthOrder(order) != order) { errln("Strength order for a tertiary strength collator should be the third byte"); return; } delete iter; delete coll; }
// @bug 4078588 // // RuleBasedCollator breaks on "< a < bb" rule // void CollationRegressionTest::Test4078588(/* char *par */) { UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *rbc = new RuleBasedCollator("&9 < a < bb", status); if (rbc == NULL || U_FAILURE(status)) { errln("Failed to create RuleBasedCollator."); delete rbc; return; } Collator::EComparisonResult result = rbc->compare("a","bb"); if (result != Collator::LESS) { errln((UnicodeString)"Compare(a,bb) returned " + (int)result + (UnicodeString)"; expected -1"); } delete rbc; }
// @bug 4066696 // // French secondary collation checking at the end of compare iteration fails // void CollationRegressionTest::Test4066696(/* char* par */) { UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *c = NULL; c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status); if (c == NULL || U_FAILURE(status)) { errln("Failure creating collator for Locale::getCanadaFrench()"); delete c; return; } c->setStrength(Collator::SECONDARY); /* String[] tests = { "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute }; should be: String[] tests = { "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute }; */ static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0} }; compareArray(*c, tests, ARRAY_LENGTH(tests)); delete c; }
// @bug 4114076 // // Collation not Unicode conformant with Hangul syllables // void CollationRegressionTest::Test4114076(/* char* par */) { UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); c->setStrength(Collator::TERTIARY); // // With Canonical decomposition, Hangul syllables should get decomposed // into Jamo, but Jamo characters should not be decomposed into // conjoining Jamo // static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0} }; c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); compareArray(*c, test1, ARRAY_LENGTH(test1)); // From UTR #15: // *In earlier versions of Unicode, jamo characters like ksf // had compatibility mappings to kf + sf. These mappings were // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.) // That is, the following test is obsolete as of 2.1.9 //obsolete- // With Full decomposition, it should go all the way down to //obsolete- // conjoining Jamo characters. //obsolete- // //obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = //obsolete- { //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0} //obsolete- }; //obsolete- //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT); //obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2)); delete c; }
// @bug 4081866 // // Combining characters in different classes not reordered properly. // void CollationRegressionTest::Test4081866(/* char* par */) { // These combining characters are all in different classes, // so they should be reordered and the strings should compare as equal. static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0}; static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0}; UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); c->setStrength(Collator::TERTIARY); // Now that the default collators are set to NO_DECOMPOSITION // (as a result of fixing bug 4114077), we must set it explicitly // when we're testing reordering behavior. -- lwerner, 5/5/98 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); if (c->compare(s1,s2) != 0) { errln("Combining chars were not reordered"); } delete c; }
// @bug 4059820 // // RuleBasedCollator.getRules does not return the exact pattern as input // for expanding character sequences // void CollationRegressionTest::Test4059820(/* char* par */) { UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *c = NULL; UnicodeString rules = "&9 < a < b , c/a < d < z"; c = new RuleBasedCollator(rules, status); if (c == NULL || U_FAILURE(status)) { errln("Failure building a collator."); delete c; return; } if ( c->getRules().indexOf("c/a") == -1) { errln("returned rules do not contain 'c/a'"); } delete c; }
void CollationIteratorTest::TestMaxExpansion(/* char* par */) { UErrorCode status = U_ZERO_ERROR; UnicodeString rule("&a < ab < c/aba < d < z < ch"); RuleBasedCollator *coll = new RuleBasedCollator(rule, status); UChar ch = 0; UnicodeString str(ch); CollationElementIterator *iter = coll->createCollationElementIterator(str); while (ch < 0xFFFF && U_SUCCESS(status)) { int count = 1; uint32_t order; ch ++; UnicodeString str(ch); iter->setText(str, status); order = iter->previous(status); /* thai management */ if (CollationElementIterator::isIgnorable(order)) order = iter->previous(status); while (U_SUCCESS(status) && iter->previous(status) != (int32_t)UCOL_NULLORDER) { count ++; } if (U_FAILURE(status) && iter->getMaxExpansion(order) < count) { errln("Failure at codepoint %d, maximum expansion count < %d\n", ch, count); } } delete iter; delete coll; }
// @bug 4054734 // // Collator::IDENTICAL documented but not implemented // void CollationRegressionTest::Test4054734(/* char* par */) { /* Here's the original Java: String[] decomp = { "\u0001", "<", "\u0002", "\u0001", "=", "\u0001", "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise "\u00C0", "=", "A\u0300" // Decomp should make these equal }; String[] nodecomp = { "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave }; */ static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0x0001, 0}, {0x3c, 0}, {0x0002, 0}, {0x0001, 0}, {0x3d, 0}, {0x0001, 0}, {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0}, {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0} }; UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); c->setStrength(Collator::IDENTICAL); c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); compareArray(*c, decomp, ARRAY_LENGTH(decomp)); delete c; }
void CollationIteratorTest::TestClearBuffers(/* char* par */) { UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *c = new RuleBasedCollator((UnicodeString)"&a < b < c & ab = d", status); if (c == NULL || U_FAILURE(status)) { errln("Couldn't create a RuleBasedCollator."); delete c; return; } UnicodeString source("abcd"); CollationElementIterator *i = c->createCollationElementIterator(source); int32_t e0 = i->next(status); // save the first collation element if (U_FAILURE(status)) { errln("call to i->next() failed. err=%s", u_errorName(status)); } else { i->setOffset(3, status); // go to the expanding character if (U_FAILURE(status)) { errln("call to i->setOffset(3) failed. err=%s", u_errorName(status)); } else { i->next(status); // but only use up half of it if (U_FAILURE(status)) { errln("call to i->next() failed. err=%s", u_errorName(status)); } else { i->setOffset(0, status); // go back to the beginning if (U_FAILURE(status)) { errln("call to i->setOffset(0) failed. err=%s", u_errorName(status)); } else { int32_t e = i->next(status); // and get this one again if (U_FAILURE(status)) { errln("call to i->next() failed. err=%s", u_errorName(status)); } else if (e != e0) { errln("got 0x%X, expected 0x%X", e, e0); } } } } } delete i; delete c; }
void SSearchTest::offsetTest() { const char *test[] = { // The sequence \u0FB3\u0F71\u0F71\u0F80 contains a discontiguous // contraction (\u0FB3\u0F71\u0F80) logically followed by \u0F71. "\\u1E33\\u0FB3\\u0F71\\u0F71\\u0F80\\uD835\\uDF6C\\u01B0", "\\ua191\\u16ef\\u2036\\u017a", #if 0 // This results in a complex interaction between contraction, // expansion and normalization that confuses the backwards offset fixups. "\\u0F7F\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85", #endif "\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85", "\\u07E9\\u07EA\\u07F1\\u07F2\\u07F3", "\\u02FE\\u02FF" "\\u0300\\u0301\\u0302\\u0303\\u0304\\u0305\\u0306\\u0307\\u0308\\u0309\\u030A\\u030B\\u030C\\u030D\\u030E\\u030F" "\\u0310\\u0311\\u0312\\u0313\\u0314\\u0315\\u0316\\u0317\\u0318\\u0319\\u031A\\u031B\\u031C\\u031D\\u031E\\u031F" "\\u0320\\u0321\\u0322\\u0323\\u0324\\u0325\\u0326\\u0327\\u0328\\u0329\\u032A\\u032B\\u032C\\u032D\\u032E\\u032F" "\\u0330\\u0331\\u0332\\u0333\\u0334\\u0335\\u0336\\u0337\\u0338\\u0339\\u033A\\u033B\\u033C\\u033D\\u033E\\u033F" "\\u0340\\u0341\\u0342\\u0343\\u0344\\u0345\\u0346\\u0347\\u0348\\u0349\\u034A\\u034B\\u034C\\u034D\\u034E", // currently not working, see #8081 "\\u02FE\\u02FF\\u0300\\u0301\\u0302\\u0303\\u0316\\u0317\\u0318", // currently not working, see #8081 "a\\u02FF\\u0301\\u0316", // currently not working, see #8081 "a\\u02FF\\u0316\\u0301", "a\\u0430\\u0301\\u0316", "a\\u0430\\u0316\\u0301", "abc\\u0E41\\u0301\\u0316", "abc\\u0E41\\u0316\\u0301", "\\u0E41\\u0301\\u0316", "\\u0E41\\u0316\\u0301", "a\\u0301\\u0316", "a\\u0316\\u0301", "\\uAC52\\uAC53", "\\u34CA\\u34CB", "\\u11ED\\u11EE", "\\u30C3\\u30D0", "p\\u00E9ch\\u00E9", "a\\u0301\\u0325", "a\\u0300\\u0325", "a\\u0325\\u0300", "A\\u0323\\u0300B", "A\\u0300\\u0323B", "A\\u0301\\u0323B", "A\\u0302\\u0301\\u0323B", "abc", "ab\\u0300c", "ab\\u0300\\u0323c", " \\uD800\\uDC00\\uDC00", "a\\uD800\\uDC00\\uDC00", "A\\u0301\\u0301", "A\\u0301\\u0323", "A\\u0301\\u0323B", "B\\u0301\\u0323C", "A\\u0300\\u0323B", "\\u0301A\\u0301\\u0301", "abcd\\r\\u0301", "p\\u00EAche", "pe\\u0302che", }; int32_t testCount = ARRAY_SIZE(test); UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *col = (RuleBasedCollator *) Collator::createInstance(Locale::getEnglish(), status); if (U_FAILURE(status)) { errcheckln(status, "Failed to create collator in offsetTest! - %s", u_errorName(status)); return; } char buffer[4096]; // A bit of a hack... just happens to be long enough for all the test cases... // We could allocate one that's the right size by (CE_count * 10) + 2 // 10 chars is enough room for 8 hex digits plus ", ". 2 extra chars for "[" and "]" col->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); for(int32_t i = 0; i < testCount; i += 1) { if (i>=4 && i<=6 && logKnownIssue("9156", "was 8081")) { continue; // timebomb until ticket #9156 (was #8081) is resolved } UnicodeString ts = CharsToUnicodeString(test[i]); CollationElementIterator *iter = col->createCollationElementIterator(ts); OrderList forwardList; OrderList backwardList; int32_t order, low, high; do { low = iter->getOffset(); order = iter->next(status); high = iter->getOffset(); forwardList.add(order, low, high); } while (order != CollationElementIterator::NULLORDER); iter->reset(); iter->setOffset(ts.length(), status); backwardList.add(CollationElementIterator::NULLORDER, iter->getOffset(), iter->getOffset()); do { high = iter->getOffset(); order = iter->previous(status); low = iter->getOffset(); if (order == CollationElementIterator::NULLORDER) { break; } backwardList.add(order, low, high); } while (TRUE); backwardList.reverse(); if (forwardList.compare(backwardList)) { logln("Works with \"%s\"", test[i]); logln("Forward offsets: [%s]", printOffsets(buffer, forwardList)); // logln("Backward offsets: [%s]", printOffsets(buffer, backwardList)); logln("Forward CEs: [%s]", printOrders(buffer, forwardList)); // logln("Backward CEs: [%s]", printOrders(buffer, backwardList)); logln(); } else { errln("Fails with \"%s\"", test[i]); infoln("Forward offsets: [%s]", printOffsets(buffer, forwardList)); infoln("Backward offsets: [%s]", printOffsets(buffer, backwardList)); infoln("Forward CEs: [%s]", printOrders(buffer, forwardList)); infoln("Backward CEs: [%s]", printOrders(buffer, backwardList)); infoln(); } delete iter; } delete col; }
void CollationIteratorTest::TestAssignment() { UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *coll = (RuleBasedCollator *)Collator::createInstance(status); if (coll == NULL || U_FAILURE(status)) { errln("Couldn't create a default collator."); return; } UnicodeString source("abcd"); CollationElementIterator *iter1 = coll->createCollationElementIterator(source); CollationElementIterator iter2 = *iter1; if (*iter1 != iter2) { errln("Fail collation iterator assignment does not produce the same elements"); } CollationElementIterator iter3(*iter1); if (*iter1 != iter3) { errln("Fail collation iterator copy constructor does not produce the same elements"); } source = CharsToUnicodeString("a\\u0300\\u0325"); coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); CollationElementIterator *iter4 = coll->createCollationElementIterator(source); CollationElementIterator iter5(*iter4); if (*iter4 != iter5) { errln("collation iterator assignment does not produce the same elements"); } iter4->next(status); if (U_FAILURE(status) || *iter4 == iter5) { errln("collation iterator not equal"); } iter5.next(status); if (U_FAILURE(status) || *iter4 != iter5) { errln("collation iterator equal"); } iter4->next(status); if (U_FAILURE(status) || *iter4 == iter5) { errln("collation iterator not equal"); } iter5.next(status); if (U_FAILURE(status) || *iter4 != iter5) { errln("collation iterator equal"); } CollationElementIterator iter6(*iter4); if (*iter4 != iter6) { errln("collation iterator equal"); } iter4->next(status); if (U_FAILURE(status) || *iter4 == iter5) { errln("collation iterator not equal"); } iter5.next(status); if (U_FAILURE(status) || *iter4 != iter5) { errln("collation iterator equal"); } iter4->next(status); if (U_FAILURE(status) || *iter4 == iter5) { errln("collation iterator not equal"); } iter5.next(status); if (U_FAILURE(status) || *iter4 != iter5) { errln("collation iterator equal"); } delete iter1; delete iter4; delete coll; }
// // First characters in scripts. // Create a UVector whose contents are pointers to UnicodeStrings for the First Characters in each script. // The vector is sorted according to this index's collation. // // This code is too slow to use, so for now hard code the data. // Hard coded implementation is follows. // UVector *AlphabeticIndex::firstStringsInScript(Collator *ruleBasedCollator, UErrorCode &status) { if (U_FAILURE(status)) { return NULL; } UnicodeString results[USCRIPT_CODE_LIMIT]; UnicodeString LOWER_A = UNICODE_STRING_SIMPLE("a"); UnicodeSetIterator siter(*TO_TRY); while (siter.next()) { const UnicodeString ¤t = siter.getString(); Collator::EComparisonResult r = ruleBasedCollator->compare(current, LOWER_A); if (r < 0) { // TODO fix; we only want "real" script characters, not // symbols. continue; } int script = uscript_getScript(current.char32At(0), &status); if (results[script].length() == 0) { results[script] = current; } else if (ruleBasedCollator->compare(current, results[script]) < 0) { results[script] = current; } } UnicodeSet extras; UnicodeSet expansions; RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(ruleBasedCollator); const UCollator *uRuleBasedCollator = rbc->getUCollator(); ucol_getContractionsAndExpansions(uRuleBasedCollator, extras.toUSet(), expansions.toUSet(), true, &status); extras.addAll(expansions).removeAll(*TO_TRY); if (extras.size() != 0) { const Normalizer2 *normalizer = Normalizer2::getNFKCInstance(status); UnicodeSetIterator extrasIter(extras); while (extrasIter.next()) { const UnicodeString ¤t = extrasIter.next(); if (!TO_TRY->containsAll(current)) continue; if (!normalizer->isNormalized(current, status) || ruleBasedCollator->compare(current, LOWER_A) < 0) { continue; } int script = uscript_getScript(current.char32At(0), &status); if (results[script].length() == 0) { results[script] = current; } else if (ruleBasedCollator->compare(current, results[script]) < 0) { results[script] = current; } } } UVector *dest = new UVector(status); dest->setDeleter(uprv_deleteUObject); for (uint32_t i = 0; i < sizeof(results) / sizeof(results[0]); ++i) { if (results[i].length() > 0) { dest->addElement(results[i].clone(), status); } } dest->sortWithUComparator(sortCollateComparator, ruleBasedCollator, status); return dest; }
void CollationCurrencyTest::currencyTest(/*char *par*/) { // All the currency symbols, in collation order static const UChar currency[][2] = { { 0x00A4, 0x0000}, /*00A4; L; [14 36, 03, 03] # [082B.0020.0002] # CURRENCY SIGN*/ { 0x00A2, 0x0000}, /*00A2; L; [14 38, 03, 03] # [082C.0020.0002] # CENT SIGN*/ { 0xFFE0, 0x0000}, /*FFE0; L; [14 38, 03, 05] # [082C.0020.0003] # FULLWIDTH CENT SIGN*/ { 0x0024, 0x0000}, /*0024; L; [14 3A, 03, 03] # [082D.0020.0002] # DOLLAR SIGN*/ { 0xFF04, 0x0000}, /*FF04; L; [14 3A, 03, 05] # [082D.0020.0003] # FULLWIDTH DOLLAR SIGN*/ { 0xFE69, 0x0000}, /*FE69; L; [14 3A, 03, 1D] # [082D.0020.000F] # SMALL DOLLAR SIGN*/ { 0x00A3, 0x0000}, /*00A3; L; [14 3C, 03, 03] # [082E.0020.0002] # POUND SIGN*/ { 0xFFE1, 0x0000}, /*FFE1; L; [14 3C, 03, 05] # [082E.0020.0003] # FULLWIDTH POUND SIGN*/ { 0x00A5, 0x0000}, /*00A5; L; [14 3E, 03, 03] # [082F.0020.0002] # YEN SIGN*/ { 0xFFE5, 0x0000}, /*FFE5; L; [14 3E, 03, 05] # [082F.0020.0003] # FULLWIDTH YEN SIGN*/ { 0x09F2, 0x0000}, /*09F2; L; [14 40, 03, 03] # [0830.0020.0002] # BENGALI RUPEE MARK*/ { 0x09F3, 0x0000}, /*09F3; L; [14 42, 03, 03] # [0831.0020.0002] # BENGALI RUPEE SIGN*/ { 0x0E3F, 0x0000}, /*0E3F; L; [14 44, 03, 03] # [0832.0020.0002] # THAI CURRENCY SYMBOL BAHT*/ { 0x17DB, 0x0000}, /*17DB; L; [14 46, 03, 03] # [0833.0020.0002] # KHMER CURRENCY SYMBOL RIEL*/ { 0x20A0, 0x0000}, /*20A0; L; [14 48, 03, 03] # [0834.0020.0002] # EURO-CURRENCY SIGN*/ { 0x20A1, 0x0000}, /*20A1; L; [14 4A, 03, 03] # [0835.0020.0002] # COLON SIGN*/ { 0x20A2, 0x0000}, /*20A2; L; [14 4C, 03, 03] # [0836.0020.0002] # CRUZEIRO SIGN*/ { 0x20A3, 0x0000}, /*20A3; L; [14 4E, 03, 03] # [0837.0020.0002] # FRENCH FRANC SIGN*/ { 0x20A4, 0x0000}, /*20A4; L; [14 50, 03, 03] # [0838.0020.0002] # LIRA SIGN*/ { 0x20A5, 0x0000}, /*20A5; L; [14 52, 03, 03] # [0839.0020.0002] # MILL SIGN*/ { 0x20A6, 0x0000}, /*20A6; L; [14 54, 03, 03] # [083A.0020.0002] # NAIRA SIGN*/ { 0x20A7, 0x0000}, /*20A7; L; [14 56, 03, 03] # [083B.0020.0002] # PESETA SIGN*/ { 0x20A9, 0x0000}, /*20A9; L; [14 58, 03, 03] # [083C.0020.0002] # WON SIGN*/ { 0xFFE6, 0x0000}, /*FFE6; L; [14 58, 03, 05] # [083C.0020.0003] # FULLWIDTH WON SIGN*/ { 0x20AA, 0x0000}, /*20AA; L; [14 5A, 03, 03] # [083D.0020.0002] # NEW SHEQEL SIGN*/ { 0x20AB, 0x0000}, /*20AB; L; [14 5C, 03, 03] # [083E.0020.0002] # DONG SIGN*/ { 0x20AC, 0x0000}, /*20AC; L; [14 5E, 03, 03] # [083F.0020.0002] # EURO SIGN*/ { 0x20AD, 0x0000}, /*20AD; L; [14 60, 03, 03] # [0840.0020.0002] # KIP SIGN*/ { 0x20AE, 0x0000}, /*20AE; L; [14 62, 03, 03] # [0841.0020.0002] # TUGRIK SIGN*/ { 0x20AF, 0x0000}, /*20AF; L; [14 64, 03, 03] # [0842.0020.0002] # DRACHMA SIGN*/ }; uint32_t i, j; UErrorCode status = U_ZERO_ERROR; Collator::EComparisonResult expectedResult = Collator::EQUAL; RuleBasedCollator *c = (RuleBasedCollator *)Collator::createInstance("en_US", status); if (U_FAILURE(status)) { errln ("Collator::createInstance() failed!"); return; } // Compare each currency symbol against all the // currency symbols, including itself for (i = 0; i < ARRAY_LENGTH(currency); i += 1) { for (j = 0; j < ARRAY_LENGTH(currency); j += 1) { UnicodeString source(currency[i], 1); UnicodeString target(currency[j], 1); if (i < j) { expectedResult = Collator::LESS; } else if ( i == j) { expectedResult = Collator::EQUAL; } else { expectedResult = Collator::GREATER; } Collator::EComparisonResult compareResult = c->compare(source, target); CollationKey sourceKey, targetKey; UErrorCode status = U_ZERO_ERROR; c->getCollationKey(source, sourceKey, status); if (U_FAILURE(status)) { errln("Couldn't get collationKey for source"); continue; } c->getCollationKey(target, targetKey, status); if (U_FAILURE(status)) { errln("Couldn't get collationKey for target"); continue; } Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey); reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult ); } } delete c; }
My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status) : RuleBasedCollator(rbc.getRules(), status) { }
// @bug 4060154 // // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I" // void CollationRegressionTest::Test4060154(/* char* par */) { UErrorCode status = U_ZERO_ERROR; UnicodeString rules; rules += "&f < g, G < h, H < i, I < j, J"; rules += " & H < "; rules += (UChar)0x0131; rules += ", "; rules += (UChar)0x0130; rules += ", i, I"; RuleBasedCollator *c = NULL; c = new RuleBasedCollator(rules, status); if (c == NULL || U_FAILURE(status)) { errln("failure building collator."); delete c; return; } c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); /* String[] tertiary = { "A", "<", "B", "H", "<", "\u0131", "H", "<", "I", "\u0131", "<", "\u0130", "\u0130", "<", "i", "\u0130", ">", "H", }; */ static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0x41, 0}, {0x3c, 0}, {0x42, 0}, {0x48, 0}, {0x3c, 0}, {0x0131, 0}, {0x48, 0}, {0x3c, 0}, {0x49, 0}, {0x0131, 0}, {0x3c, 0}, {0x0130, 0}, {0x0130, 0}, {0x3c, 0}, {0x69, 0}, {0x0130, 0}, {0x3e, 0}, {0x48, 0} }; c->setStrength(Collator::TERTIARY); compareArray(*c, tertiary, ARRAY_LENGTH(tertiary)); /* String[] secondary = { "H", "<", "I", "\u0131", "=", "\u0130", }; */ static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0x48, 0}, {0x3c, 0}, {0x49, 0}, {0x0131, 0}, {0x3d, 0}, {0x0130, 0} }; c->setStrength(Collator::PRIMARY); compareArray(*c, secondary, ARRAY_LENGTH(secondary)); delete c; }