void CollationIteratorTest::TestConstructors() { UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *coll = (RuleBasedCollator *)Collator::createInstance(status); if (coll == NULL || U_FAILURE(status)) { errln("Couldn't create a default collator."); return; } // testing protected constructor with character iterator as argument StringCharacterIterator chariter(test1); CollationElementIterator *iter1 = coll->createCollationElementIterator(chariter); if (U_FAILURE(status)) { errln("Couldn't create collation element iterator with character iterator."); return; } CollationElementIterator *iter2 = coll->createCollationElementIterator(test1); // initially the 2 collation element iterators should be the same if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2 || *iter2 != *iter1) { errln("CollationElementIterators constructed with the same string data should be the same at the start"); } assertEqual(*iter1, *iter2); delete iter1; delete iter2; // tests empty strings UnicodeString empty(""); iter1 = coll->createCollationElementIterator(empty); chariter.setText(empty); iter2 = coll->createCollationElementIterator(chariter); if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2 || *iter2 != *iter1) { errln("CollationElementIterators constructed with the same string data should be the same at the start"); } if (iter1->next(status) != (int32_t)UCOL_NULLORDER) { errln("Empty string should have no CEs."); } if (iter2->next(status) != (int32_t)UCOL_NULLORDER) { errln("Empty string should have no CEs."); } delete iter1; delete iter2; delete coll; }
// @bug 4101940 // void CollationRegressionTest::Test4101940(/* char* par */) { UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *c = NULL; UnicodeString rules = "&9 < a < b"; UnicodeString nothing = ""; c = new RuleBasedCollator(rules, status); if (c == NULL || U_FAILURE(status)) { errln("Failed to create RuleBasedCollator"); delete c; return; } CollationElementIterator *i = c->createCollationElementIterator(nothing); i->reset(); if (i->next(status) != CollationElementIterator::NULLORDER) { errln("next did not return NULLORDER"); } delete i; delete c; }
// @bug 4054238 // // CollationElementIterator will not work correctly if the associated // Collator object's mode is changed // void CollationRegressionTest::Test4054238(/* char* par */) { const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0}; const UnicodeString test3(chars3); RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); // NOTE: The Java code uses en_us to create the CollationElementIterators // but I'm pretty sure that's wrong, so I've changed this to use c. UErrorCode status = U_ZERO_ERROR; c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); CollationElementIterator *i1 = c->createCollationElementIterator(test3); delete i1; delete c; }
void CollationIteratorTest::TestStrengthOrder() { int order = 0x0123ABCD; UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *coll = (RuleBasedCollator *)Collator::createInstance(status); if (coll == NULL || U_FAILURE(status)) { errln("Couldn't create a default collator."); return; } coll->setStrength(Collator::PRIMARY); CollationElementIterator *iter = coll->createCollationElementIterator(test1); if (iter == NULL) { errln("Couldn't create a collation element iterator from default collator"); return; } if (iter->strengthOrder(order) != 0x01230000) { errln("Strength order for a primary strength collator should be the first 2 bytes"); return; } coll->setStrength(Collator::SECONDARY); if (iter->strengthOrder(order) != 0x0123AB00) { errln("Strength order for a secondary strength collator should be the third byte"); return; } coll->setStrength(Collator::TERTIARY); if (iter->strengthOrder(order) != order) { errln("Strength order for a tertiary strength collator should be the third byte"); return; } delete iter; delete coll; }
void CollationIteratorTest::TestMaxExpansion(/* char* par */) { UErrorCode status = U_ZERO_ERROR; UnicodeString rule("&a < ab < c/aba < d < z < ch"); RuleBasedCollator *coll = new RuleBasedCollator(rule, status); UChar ch = 0; UnicodeString str(ch); CollationElementIterator *iter = coll->createCollationElementIterator(str); while (ch < 0xFFFF && U_SUCCESS(status)) { int count = 1; uint32_t order; ch ++; UnicodeString str(ch); iter->setText(str, status); order = iter->previous(status); /* thai management */ if (CollationElementIterator::isIgnorable(order)) order = iter->previous(status); while (U_SUCCESS(status) && iter->previous(status) != (int32_t)UCOL_NULLORDER) { count ++; } if (U_FAILURE(status) && iter->getMaxExpansion(order) < count) { errln("Failure at codepoint %d, maximum expansion count < %d\n", ch, count); } } delete iter; delete coll; }
void SSearchTest::offsetTest() { const char *test[] = { // The sequence \u0FB3\u0F71\u0F71\u0F80 contains a discontiguous // contraction (\u0FB3\u0F71\u0F80) logically followed by \u0F71. "\\u1E33\\u0FB3\\u0F71\\u0F71\\u0F80\\uD835\\uDF6C\\u01B0", "\\ua191\\u16ef\\u2036\\u017a", #if 0 // This results in a complex interaction between contraction, // expansion and normalization that confuses the backwards offset fixups. "\\u0F7F\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85", #endif "\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85", "\\u07E9\\u07EA\\u07F1\\u07F2\\u07F3", "\\u02FE\\u02FF" "\\u0300\\u0301\\u0302\\u0303\\u0304\\u0305\\u0306\\u0307\\u0308\\u0309\\u030A\\u030B\\u030C\\u030D\\u030E\\u030F" "\\u0310\\u0311\\u0312\\u0313\\u0314\\u0315\\u0316\\u0317\\u0318\\u0319\\u031A\\u031B\\u031C\\u031D\\u031E\\u031F" "\\u0320\\u0321\\u0322\\u0323\\u0324\\u0325\\u0326\\u0327\\u0328\\u0329\\u032A\\u032B\\u032C\\u032D\\u032E\\u032F" "\\u0330\\u0331\\u0332\\u0333\\u0334\\u0335\\u0336\\u0337\\u0338\\u0339\\u033A\\u033B\\u033C\\u033D\\u033E\\u033F" "\\u0340\\u0341\\u0342\\u0343\\u0344\\u0345\\u0346\\u0347\\u0348\\u0349\\u034A\\u034B\\u034C\\u034D\\u034E", // currently not working, see #8081 "\\u02FE\\u02FF\\u0300\\u0301\\u0302\\u0303\\u0316\\u0317\\u0318", // currently not working, see #8081 "a\\u02FF\\u0301\\u0316", // currently not working, see #8081 "a\\u02FF\\u0316\\u0301", "a\\u0430\\u0301\\u0316", "a\\u0430\\u0316\\u0301", "abc\\u0E41\\u0301\\u0316", "abc\\u0E41\\u0316\\u0301", "\\u0E41\\u0301\\u0316", "\\u0E41\\u0316\\u0301", "a\\u0301\\u0316", "a\\u0316\\u0301", "\\uAC52\\uAC53", "\\u34CA\\u34CB", "\\u11ED\\u11EE", "\\u30C3\\u30D0", "p\\u00E9ch\\u00E9", "a\\u0301\\u0325", "a\\u0300\\u0325", "a\\u0325\\u0300", "A\\u0323\\u0300B", "A\\u0300\\u0323B", "A\\u0301\\u0323B", "A\\u0302\\u0301\\u0323B", "abc", "ab\\u0300c", "ab\\u0300\\u0323c", " \\uD800\\uDC00\\uDC00", "a\\uD800\\uDC00\\uDC00", "A\\u0301\\u0301", "A\\u0301\\u0323", "A\\u0301\\u0323B", "B\\u0301\\u0323C", "A\\u0300\\u0323B", "\\u0301A\\u0301\\u0301", "abcd\\r\\u0301", "p\\u00EAche", "pe\\u0302che", }; int32_t testCount = ARRAY_SIZE(test); UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *col = (RuleBasedCollator *) Collator::createInstance(Locale::getEnglish(), status); if (U_FAILURE(status)) { errcheckln(status, "Failed to create collator in offsetTest! - %s", u_errorName(status)); return; } char buffer[4096]; // A bit of a hack... just happens to be long enough for all the test cases... // We could allocate one that's the right size by (CE_count * 10) + 2 // 10 chars is enough room for 8 hex digits plus ", ". 2 extra chars for "[" and "]" col->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); for(int32_t i = 0; i < testCount; i += 1) { if (i>=4 && i<=6 && logKnownIssue("9156", "was 8081")) { continue; // timebomb until ticket #9156 (was #8081) is resolved } UnicodeString ts = CharsToUnicodeString(test[i]); CollationElementIterator *iter = col->createCollationElementIterator(ts); OrderList forwardList; OrderList backwardList; int32_t order, low, high; do { low = iter->getOffset(); order = iter->next(status); high = iter->getOffset(); forwardList.add(order, low, high); } while (order != CollationElementIterator::NULLORDER); iter->reset(); iter->setOffset(ts.length(), status); backwardList.add(CollationElementIterator::NULLORDER, iter->getOffset(), iter->getOffset()); do { high = iter->getOffset(); order = iter->previous(status); low = iter->getOffset(); if (order == CollationElementIterator::NULLORDER) { break; } backwardList.add(order, low, high); } while (TRUE); backwardList.reverse(); if (forwardList.compare(backwardList)) { logln("Works with \"%s\"", test[i]); logln("Forward offsets: [%s]", printOffsets(buffer, forwardList)); // logln("Backward offsets: [%s]", printOffsets(buffer, backwardList)); logln("Forward CEs: [%s]", printOrders(buffer, forwardList)); // logln("Backward CEs: [%s]", printOrders(buffer, backwardList)); logln(); } else { errln("Fails with \"%s\"", test[i]); infoln("Forward offsets: [%s]", printOffsets(buffer, forwardList)); infoln("Backward offsets: [%s]", printOffsets(buffer, backwardList)); infoln("Forward CEs: [%s]", printOrders(buffer, forwardList)); infoln("Backward CEs: [%s]", printOrders(buffer, backwardList)); infoln(); } delete iter; } delete col; }
void CollationIteratorTest::TestAssignment() { UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *coll = (RuleBasedCollator *)Collator::createInstance(status); if (coll == NULL || U_FAILURE(status)) { errln("Couldn't create a default collator."); return; } UnicodeString source("abcd"); CollationElementIterator *iter1 = coll->createCollationElementIterator(source); CollationElementIterator iter2 = *iter1; if (*iter1 != iter2) { errln("Fail collation iterator assignment does not produce the same elements"); } CollationElementIterator iter3(*iter1); if (*iter1 != iter3) { errln("Fail collation iterator copy constructor does not produce the same elements"); } source = CharsToUnicodeString("a\\u0300\\u0325"); coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); CollationElementIterator *iter4 = coll->createCollationElementIterator(source); CollationElementIterator iter5(*iter4); if (*iter4 != iter5) { errln("collation iterator assignment does not produce the same elements"); } iter4->next(status); if (U_FAILURE(status) || *iter4 == iter5) { errln("collation iterator not equal"); } iter5.next(status); if (U_FAILURE(status) || *iter4 != iter5) { errln("collation iterator equal"); } iter4->next(status); if (U_FAILURE(status) || *iter4 == iter5) { errln("collation iterator not equal"); } iter5.next(status); if (U_FAILURE(status) || *iter4 != iter5) { errln("collation iterator equal"); } CollationElementIterator iter6(*iter4); if (*iter4 != iter6) { errln("collation iterator equal"); } iter4->next(status); if (U_FAILURE(status) || *iter4 == iter5) { errln("collation iterator not equal"); } iter5.next(status); if (U_FAILURE(status) || *iter4 != iter5) { errln("collation iterator equal"); } iter4->next(status); if (U_FAILURE(status) || *iter4 == iter5) { errln("collation iterator not equal"); } iter5.next(status); if (U_FAILURE(status) || *iter4 != iter5) { errln("collation iterator equal"); } delete iter1; delete iter4; delete coll; }
void CollationIteratorTest::TestClearBuffers(/* char* par */) { UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *c = new RuleBasedCollator((UnicodeString)"&a < b < c & ab = d", status); if (c == NULL || U_FAILURE(status)) { errln("Couldn't create a RuleBasedCollator."); delete c; return; } UnicodeString source("abcd"); CollationElementIterator *i = c->createCollationElementIterator(source); int32_t e0 = i->next(status); // save the first collation element if (U_FAILURE(status)) { errln("call to i->next() failed. err=%s", u_errorName(status)); } else { i->setOffset(3, status); // go to the expanding character if (U_FAILURE(status)) { errln("call to i->setOffset(3) failed. err=%s", u_errorName(status)); } else { i->next(status); // but only use up half of it if (U_FAILURE(status)) { errln("call to i->next() failed. err=%s", u_errorName(status)); } else { i->setOffset(0, status); // go back to the beginning if (U_FAILURE(status)) { errln("call to i->setOffset(0) failed. err=%s", u_errorName(status)); } else { int32_t e = i->next(status); // and get this one again if (U_FAILURE(status)) { errln("call to i->next() failed. err=%s", u_errorName(status)); } else if (e != e0) { errln("got 0x%X, expected 0x%X", e, e0); } } } } } delete i; delete c; }