void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2) { int32_t c1, c2, count = 0; UErrorCode status = U_ZERO_ERROR; do { c1 = i1.next(status); c2 = i2.next(status); if (c1 != c2) { UnicodeString msg, msg1(" "); msg += msg1 + count; msg += ": strength(0x"; appendHex(c1, 8, msg); msg += ") != strength(0x"; appendHex(c2, 8, msg); msg += ")"; errln(msg); break; } count += 1; } while (c1 != CollationElementIterator::NULLORDER); }
// @bug 4101940 // void CollationRegressionTest::Test4101940(/* char* par */) { UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *c = NULL; UnicodeString rules = "&9 < a < b"; UnicodeString nothing = ""; c = new RuleBasedCollator(rules, status); if (c == NULL || U_FAILURE(status)) { errln("Failed to create RuleBasedCollator"); delete c; return; } CollationElementIterator *i = c->createCollationElementIterator(nothing); i->reset(); if (i->next(status) != CollationElementIterator::NULLORDER) { errln("next did not return NULLORDER"); } delete i; delete c; }
void CollationIteratorTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2) { int32_t c1, c2, count = 0; UErrorCode status = U_ZERO_ERROR; do { c1 = i1.next(status); c2 = i2.next(status); if (c1 != c2) { errln(" %d: strength(0x%X) != strength(0x%X)", count, c1, c2); break; } count += 1; } while (c1 != CollationElementIterator::NULLORDER); }
void CollationIteratorTest::TestOffset(/* char* par */) { CollationElementIterator *iter = en_us->createCollationElementIterator(test1); UErrorCode status = U_ZERO_ERROR; // testing boundaries iter->setOffset(0, status); if (U_FAILURE(status) || iter->previous(status) != UCOL_NULLORDER) { errln("Error: After setting offset to 0, we should be at the end " "of the backwards iteration"); } iter->setOffset(test1.length(), status); if (U_FAILURE(status) || iter->next(status) != UCOL_NULLORDER) { errln("Error: After setting offset to end of the string, we should " "be at the end of the backwards iteration"); } // Run all the way through the iterator, then get the offset int32_t orderLength = 0; Order *orders = getOrders(*iter, orderLength); int32_t offset = iter->getOffset(); if (offset != test1.length()) { UnicodeString msg1("offset at end != length: "); UnicodeString msg2(" vs "); errln(msg1 + offset + msg2 + test1.length()); } // Now set the offset back to the beginning and see if it works CollationElementIterator *pristine = en_us->createCollationElementIterator(test1); iter->setOffset(0, status); if (U_FAILURE(status)) { errln("setOffset failed."); } else { assertEqual(*iter, *pristine); } // TODO: try iterating halfway through a messy string. delete pristine; delete[] orders; delete iter; }
void SSearchTest::offsetTest() { const char *test[] = { // The sequence \u0FB3\u0F71\u0F71\u0F80 contains a discontiguous // contraction (\u0FB3\u0F71\u0F80) logically followed by \u0F71. "\\u1E33\\u0FB3\\u0F71\\u0F71\\u0F80\\uD835\\uDF6C\\u01B0", "\\ua191\\u16ef\\u2036\\u017a", #if 0 // This results in a complex interaction between contraction, // expansion and normalization that confuses the backwards offset fixups. "\\u0F7F\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85", #endif "\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85", "\\u07E9\\u07EA\\u07F1\\u07F2\\u07F3", "\\u02FE\\u02FF" "\\u0300\\u0301\\u0302\\u0303\\u0304\\u0305\\u0306\\u0307\\u0308\\u0309\\u030A\\u030B\\u030C\\u030D\\u030E\\u030F" "\\u0310\\u0311\\u0312\\u0313\\u0314\\u0315\\u0316\\u0317\\u0318\\u0319\\u031A\\u031B\\u031C\\u031D\\u031E\\u031F" "\\u0320\\u0321\\u0322\\u0323\\u0324\\u0325\\u0326\\u0327\\u0328\\u0329\\u032A\\u032B\\u032C\\u032D\\u032E\\u032F" "\\u0330\\u0331\\u0332\\u0333\\u0334\\u0335\\u0336\\u0337\\u0338\\u0339\\u033A\\u033B\\u033C\\u033D\\u033E\\u033F" "\\u0340\\u0341\\u0342\\u0343\\u0344\\u0345\\u0346\\u0347\\u0348\\u0349\\u034A\\u034B\\u034C\\u034D\\u034E", // currently not working, see #8081 "\\u02FE\\u02FF\\u0300\\u0301\\u0302\\u0303\\u0316\\u0317\\u0318", // currently not working, see #8081 "a\\u02FF\\u0301\\u0316", // currently not working, see #8081 "a\\u02FF\\u0316\\u0301", "a\\u0430\\u0301\\u0316", "a\\u0430\\u0316\\u0301", "abc\\u0E41\\u0301\\u0316", "abc\\u0E41\\u0316\\u0301", "\\u0E41\\u0301\\u0316", "\\u0E41\\u0316\\u0301", "a\\u0301\\u0316", "a\\u0316\\u0301", "\\uAC52\\uAC53", "\\u34CA\\u34CB", "\\u11ED\\u11EE", "\\u30C3\\u30D0", "p\\u00E9ch\\u00E9", "a\\u0301\\u0325", "a\\u0300\\u0325", "a\\u0325\\u0300", "A\\u0323\\u0300B", "A\\u0300\\u0323B", "A\\u0301\\u0323B", "A\\u0302\\u0301\\u0323B", "abc", "ab\\u0300c", "ab\\u0300\\u0323c", " \\uD800\\uDC00\\uDC00", "a\\uD800\\uDC00\\uDC00", "A\\u0301\\u0301", "A\\u0301\\u0323", "A\\u0301\\u0323B", "B\\u0301\\u0323C", "A\\u0300\\u0323B", "\\u0301A\\u0301\\u0301", "abcd\\r\\u0301", "p\\u00EAche", "pe\\u0302che", }; int32_t testCount = ARRAY_SIZE(test); UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *col = (RuleBasedCollator *) Collator::createInstance(Locale::getEnglish(), status); if (U_FAILURE(status)) { errcheckln(status, "Failed to create collator in offsetTest! - %s", u_errorName(status)); return; } char buffer[4096]; // A bit of a hack... just happens to be long enough for all the test cases... // We could allocate one that's the right size by (CE_count * 10) + 2 // 10 chars is enough room for 8 hex digits plus ", ". 2 extra chars for "[" and "]" col->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); for(int32_t i = 0; i < testCount; i += 1) { if (i>=4 && i<=6 && logKnownIssue("9156", "was 8081")) { continue; // timebomb until ticket #9156 (was #8081) is resolved } UnicodeString ts = CharsToUnicodeString(test[i]); CollationElementIterator *iter = col->createCollationElementIterator(ts); OrderList forwardList; OrderList backwardList; int32_t order, low, high; do { low = iter->getOffset(); order = iter->next(status); high = iter->getOffset(); forwardList.add(order, low, high); } while (order != CollationElementIterator::NULLORDER); iter->reset(); iter->setOffset(ts.length(), status); backwardList.add(CollationElementIterator::NULLORDER, iter->getOffset(), iter->getOffset()); do { high = iter->getOffset(); order = iter->previous(status); low = iter->getOffset(); if (order == CollationElementIterator::NULLORDER) { break; } backwardList.add(order, low, high); } while (TRUE); backwardList.reverse(); if (forwardList.compare(backwardList)) { logln("Works with \"%s\"", test[i]); logln("Forward offsets: [%s]", printOffsets(buffer, forwardList)); // logln("Backward offsets: [%s]", printOffsets(buffer, backwardList)); logln("Forward CEs: [%s]", printOrders(buffer, forwardList)); // logln("Backward CEs: [%s]", printOrders(buffer, backwardList)); logln(); } else { errln("Fails with \"%s\"", test[i]); infoln("Forward offsets: [%s]", printOffsets(buffer, forwardList)); infoln("Backward offsets: [%s]", printOffsets(buffer, backwardList)); infoln("Forward CEs: [%s]", printOrders(buffer, forwardList)); infoln("Backward CEs: [%s]", printOrders(buffer, backwardList)); infoln(); } delete iter; } delete col; }
void IntlTestCollator::backAndForth(CollationElementIterator &iter) { // Run through the iterator forwards and stick it into an array int32_t orderLength = 0; LocalArray<Order> orders(getOrders(iter, orderLength)); UErrorCode status = U_ZERO_ERROR; // Now go through it backwards and make sure we get the same values int32_t index = orderLength; int32_t o; // reset the iterator iter.reset(); while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER) { /*int32_t offset = */iter.getOffset(); if (index == 0) { if(o == 0) { continue; } else { // this is an error, orders exhausted but there are non-ignorable CEs from // going backwards errln("Backward iteration returned a non ignorable after orders are exhausted"); break; } } index -= 1; if (o != orders[index].order) { if (o == 0) index += 1; else { while (index > 0 && orders[--index].order == 0) { // nothing... } if (o != orders[index].order) { errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index, orders[index].order, o); //break; return; } } } #if TEST_OFFSETS if (offset != orders[index].offset) { errln("Mismatched offset at index %d: %d vs. %d", index, orders[index].offset, offset); //break; return; } #endif } while (index != 0 && orders[index - 1].order == 0) { index --; } if (index != 0) { UnicodeString msg("Didn't get back to beginning - index is "); errln(msg + index); iter.reset(); err("next: "); while ((o = iter.next(status)) != CollationElementIterator::NULLORDER) { UnicodeString hexString("0x"); appendHex(o, 8, hexString); hexString += " "; err(hexString); } errln(""); err("prev: "); while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER) { UnicodeString hexString("0x"); appendHex(o, 8, hexString); hexString += " "; err(hexString); } errln(""); } }
void CollationRegressionTest::Test4179216() { // you can position a CollationElementIterator in the middle of // a contracting character sequence, yielding a bogus collation // element IcuTestErrorCode errorCode(*this, "Test4179216"); RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat < crunchy", errorCode); UnicodeString testText = "church church catcatcher runcrunchynchy"; CollationElementIterator *iter = coll.createCollationElementIterator(testText); // test that the "ch" combination works properly iter->setOffset(4, errorCode); int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode)); iter->reset(); int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode)); iter->setOffset(5, errorCode); int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode)); // Compares and prints only 16-bit primary weights. if (elt4 != elt0 || elt5 != elt0) { errln("The collation elements at positions 0 (0x%04x), " "4 (0x%04x), and 5 (0x%04x) don't match.", elt0, elt4, elt5); } // test that the "cat" combination works properly iter->setOffset(14, errorCode); int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode)); iter->setOffset(15, errorCode); int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode)); iter->setOffset(16, errorCode); int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode)); iter->setOffset(17, errorCode); int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode)); iter->setOffset(18, errorCode); int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode)); iter->setOffset(19, errorCode); int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode)); // Compares and prints only 16-bit primary weights. if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17 || elt14 != elt18 || elt14 != elt19) { errln("\"cat\" elements don't match: elt14 = 0x%04x, " "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, " "elt18 = 0x%04x, elt19 = 0x%04x", elt14, elt15, elt16, elt17, elt18, elt19); } // now generate a complete list of the collation elements, // first using next() and then using setOffset(), and // make sure both interfaces return the same set of elements iter->reset(); int32_t elt = iter->next(errorCode); int32_t count = 0; while (elt != CollationElementIterator::NULLORDER) { ++count; elt = iter->next(errorCode); } LocalArray<UnicodeString> nextElements(new UnicodeString[count]); LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]); int32_t lastPos = 0; iter->reset(); elt = iter->next(errorCode); count = 0; while (elt != CollationElementIterator::NULLORDER) { nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset()); lastPos = iter->getOffset(); elt = iter->next(errorCode); } int32_t nextElementsLength = count; count = 0; for (int32_t i = 0; i < testText.length(); ) { iter->setOffset(i, errorCode); lastPos = iter->getOffset(); elt = iter->next(errorCode); setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset()); i = iter->getOffset(); } for (int32_t i = 0; i < nextElementsLength; i++) { if (nextElements[i] == setOffsetElements[i]) { logln(nextElements[i]); } else { errln(UnicodeString("Error: next() yielded ") + nextElements[i] + ", but setOffset() yielded " + setOffsetElements[i]); } } delete iter; }
void CollationIteratorTest::TestClearBuffers(/* char* par */) { UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *c = new RuleBasedCollator((UnicodeString)"&a < b < c & ab = d", status); if (c == NULL || U_FAILURE(status)) { errln("Couldn't create a RuleBasedCollator."); delete c; return; } UnicodeString source("abcd"); CollationElementIterator *i = c->createCollationElementIterator(source); int32_t e0 = i->next(status); // save the first collation element if (U_FAILURE(status)) { errln("call to i->next() failed. err=%s", u_errorName(status)); } else { i->setOffset(3, status); // go to the expanding character if (U_FAILURE(status)) { errln("call to i->setOffset(3) failed. err=%s", u_errorName(status)); } else { i->next(status); // but only use up half of it if (U_FAILURE(status)) { errln("call to i->next() failed. err=%s", u_errorName(status)); } else { i->setOffset(0, status); // go back to the beginning if (U_FAILURE(status)) { errln("call to i->setOffset(0) failed. err=%s", u_errorName(status)); } else { int32_t e = i->next(status); // and get this one again if (U_FAILURE(status)) { errln("call to i->next() failed. err=%s", u_errorName(status)); } else if (e != e0) { errln("got 0x%X, expected 0x%X", e, e0); } } } } } delete i; delete c; }