// @bug 4054734 // // Collator::IDENTICAL documented but not implemented // void CollationRegressionTest::Test4054734(/* char* par */) { /* Here's the original Java: String[] decomp = { "\u0001", "<", "\u0002", "\u0001", "=", "\u0001", "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise "\u00C0", "=", "A\u0300" // Decomp should make these equal }; String[] nodecomp = { "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave }; */ static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0x0001, 0}, {0x3c, 0}, {0x0002, 0}, {0x0001, 0}, {0x3d, 0}, {0x0001, 0}, {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0}, {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0} }; UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); c->setStrength(Collator::IDENTICAL); c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); compareArray(*c, decomp, UPRV_LENGTHOF(decomp)); delete c; }
void SSearchTest::offsetTest() { const char *test[] = { // The sequence \u0FB3\u0F71\u0F71\u0F80 contains a discontiguous // contraction (\u0FB3\u0F71\u0F80) logically followed by \u0F71. "\\u1E33\\u0FB3\\u0F71\\u0F71\\u0F80\\uD835\\uDF6C\\u01B0", "\\ua191\\u16ef\\u2036\\u017a", #if 0 // This results in a complex interaction between contraction, // expansion and normalization that confuses the backwards offset fixups. "\\u0F7F\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85", #endif "\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85", "\\u07E9\\u07EA\\u07F1\\u07F2\\u07F3", "\\u02FE\\u02FF" "\\u0300\\u0301\\u0302\\u0303\\u0304\\u0305\\u0306\\u0307\\u0308\\u0309\\u030A\\u030B\\u030C\\u030D\\u030E\\u030F" "\\u0310\\u0311\\u0312\\u0313\\u0314\\u0315\\u0316\\u0317\\u0318\\u0319\\u031A\\u031B\\u031C\\u031D\\u031E\\u031F" "\\u0320\\u0321\\u0322\\u0323\\u0324\\u0325\\u0326\\u0327\\u0328\\u0329\\u032A\\u032B\\u032C\\u032D\\u032E\\u032F" "\\u0330\\u0331\\u0332\\u0333\\u0334\\u0335\\u0336\\u0337\\u0338\\u0339\\u033A\\u033B\\u033C\\u033D\\u033E\\u033F" "\\u0340\\u0341\\u0342\\u0343\\u0344\\u0345\\u0346\\u0347\\u0348\\u0349\\u034A\\u034B\\u034C\\u034D\\u034E", // currently not working, see #8081 "\\u02FE\\u02FF\\u0300\\u0301\\u0302\\u0303\\u0316\\u0317\\u0318", // currently not working, see #8081 "a\\u02FF\\u0301\\u0316", // currently not working, see #8081 "a\\u02FF\\u0316\\u0301", "a\\u0430\\u0301\\u0316", "a\\u0430\\u0316\\u0301", "abc\\u0E41\\u0301\\u0316", "abc\\u0E41\\u0316\\u0301", "\\u0E41\\u0301\\u0316", "\\u0E41\\u0316\\u0301", "a\\u0301\\u0316", "a\\u0316\\u0301", "\\uAC52\\uAC53", "\\u34CA\\u34CB", "\\u11ED\\u11EE", "\\u30C3\\u30D0", "p\\u00E9ch\\u00E9", "a\\u0301\\u0325", "a\\u0300\\u0325", "a\\u0325\\u0300", "A\\u0323\\u0300B", "A\\u0300\\u0323B", "A\\u0301\\u0323B", "A\\u0302\\u0301\\u0323B", "abc", "ab\\u0300c", "ab\\u0300\\u0323c", " \\uD800\\uDC00\\uDC00", "a\\uD800\\uDC00\\uDC00", "A\\u0301\\u0301", "A\\u0301\\u0323", "A\\u0301\\u0323B", "B\\u0301\\u0323C", "A\\u0300\\u0323B", "\\u0301A\\u0301\\u0301", "abcd\\r\\u0301", "p\\u00EAche", "pe\\u0302che", }; int32_t testCount = ARRAY_SIZE(test); UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *col = (RuleBasedCollator *) Collator::createInstance(Locale::getEnglish(), status); if (U_FAILURE(status)) { errcheckln(status, "Failed to create collator in offsetTest! - %s", u_errorName(status)); return; } char buffer[4096]; // A bit of a hack... just happens to be long enough for all the test cases... // We could allocate one that's the right size by (CE_count * 10) + 2 // 10 chars is enough room for 8 hex digits plus ", ". 2 extra chars for "[" and "]" col->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); for(int32_t i = 0; i < testCount; i += 1) { if (i>=4 && i<=6 && logKnownIssue("9156", "was 8081")) { continue; // timebomb until ticket #9156 (was #8081) is resolved } UnicodeString ts = CharsToUnicodeString(test[i]); CollationElementIterator *iter = col->createCollationElementIterator(ts); OrderList forwardList; OrderList backwardList; int32_t order, low, high; do { low = iter->getOffset(); order = iter->next(status); high = iter->getOffset(); forwardList.add(order, low, high); } while (order != CollationElementIterator::NULLORDER); iter->reset(); iter->setOffset(ts.length(), status); backwardList.add(CollationElementIterator::NULLORDER, iter->getOffset(), iter->getOffset()); do { high = iter->getOffset(); order = iter->previous(status); low = iter->getOffset(); if (order == CollationElementIterator::NULLORDER) { break; } backwardList.add(order, low, high); } while (TRUE); backwardList.reverse(); if (forwardList.compare(backwardList)) { logln("Works with \"%s\"", test[i]); logln("Forward offsets: [%s]", printOffsets(buffer, forwardList)); // logln("Backward offsets: [%s]", printOffsets(buffer, backwardList)); logln("Forward CEs: [%s]", printOrders(buffer, forwardList)); // logln("Backward CEs: [%s]", printOrders(buffer, backwardList)); logln(); } else { errln("Fails with \"%s\"", test[i]); infoln("Forward offsets: [%s]", printOffsets(buffer, forwardList)); infoln("Backward offsets: [%s]", printOffsets(buffer, backwardList)); infoln("Forward CEs: [%s]", printOrders(buffer, forwardList)); infoln("Backward CEs: [%s]", printOrders(buffer, backwardList)); infoln(); } delete iter; } delete col; }
// @bug 4060154 // // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I" // void CollationRegressionTest::Test4060154(/* char* par */) { UErrorCode status = U_ZERO_ERROR; UnicodeString rules; rules += "&f < g, G < h, H < i, I < j, J"; rules += " & H < "; rules += (UChar)0x0131; rules += ", "; rules += (UChar)0x0130; rules += ", i, I"; RuleBasedCollator *c = NULL; c = new RuleBasedCollator(rules, status); if (c == NULL || U_FAILURE(status)) { errln("failure building collator."); delete c; return; } c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); /* String[] tertiary = { "A", "<", "B", "H", "<", "\u0131", "H", "<", "I", "\u0131", "<", "\u0130", "\u0130", "<", "i", "\u0130", ">", "H", }; */ static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0x41, 0}, {0x3c, 0}, {0x42, 0}, {0x48, 0}, {0x3c, 0}, {0x0131, 0}, {0x48, 0}, {0x3c, 0}, {0x49, 0}, {0x0131, 0}, {0x3c, 0}, {0x0130, 0}, {0x0130, 0}, {0x3c, 0}, {0x69, 0}, {0x0130, 0}, {0x3e, 0}, {0x48, 0} }; c->setStrength(Collator::TERTIARY); compareArray(*c, tertiary, ARRAY_LENGTH(tertiary)); /* String[] secondary = { "H", "<", "I", "\u0131", "=", "\u0130", }; */ static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] = { {0x48, 0}, {0x3c, 0}, {0x49, 0}, {0x0131, 0}, {0x3d, 0}, {0x0130, 0} }; c->setStrength(Collator::PRIMARY); compareArray(*c, secondary, ARRAY_LENGTH(secondary)); delete c; }
void CollationIteratorTest::TestAssignment() { UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *coll = (RuleBasedCollator *)Collator::createInstance(status); if (coll == NULL || U_FAILURE(status)) { errln("Couldn't create a default collator."); return; } UnicodeString source("abcd"); CollationElementIterator *iter1 = coll->createCollationElementIterator(source); CollationElementIterator iter2 = *iter1; if (*iter1 != iter2) { errln("Fail collation iterator assignment does not produce the same elements"); } CollationElementIterator iter3(*iter1); if (*iter1 != iter3) { errln("Fail collation iterator copy constructor does not produce the same elements"); } source = CharsToUnicodeString("a\\u0300\\u0325"); coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); CollationElementIterator *iter4 = coll->createCollationElementIterator(source); CollationElementIterator iter5(*iter4); if (*iter4 != iter5) { errln("collation iterator assignment does not produce the same elements"); } iter4->next(status); if (U_FAILURE(status) || *iter4 == iter5) { errln("collation iterator not equal"); } iter5.next(status); if (U_FAILURE(status) || *iter4 != iter5) { errln("collation iterator equal"); } iter4->next(status); if (U_FAILURE(status) || *iter4 == iter5) { errln("collation iterator not equal"); } iter5.next(status); if (U_FAILURE(status) || *iter4 != iter5) { errln("collation iterator equal"); } CollationElementIterator iter6(*iter4); if (*iter4 != iter6) { errln("collation iterator equal"); } iter4->next(status); if (U_FAILURE(status) || *iter4 == iter5) { errln("collation iterator not equal"); } iter5.next(status); if (U_FAILURE(status) || *iter4 != iter5) { errln("collation iterator equal"); } iter4->next(status); if (U_FAILURE(status) || *iter4 == iter5) { errln("collation iterator not equal"); } iter5.next(status); if (U_FAILURE(status) || *iter4 != iter5) { errln("collation iterator equal"); } delete iter1; delete iter4; delete coll; }