void CollationIteratorTest::TestOffset(/* char* par */) { CollationElementIterator *iter = en_us->createCollationElementIterator(test1); UErrorCode status = U_ZERO_ERROR; // testing boundaries iter->setOffset(0, status); if (U_FAILURE(status) || iter->previous(status) != UCOL_NULLORDER) { errln("Error: After setting offset to 0, we should be at the end " "of the backwards iteration"); } iter->setOffset(test1.length(), status); if (U_FAILURE(status) || iter->next(status) != UCOL_NULLORDER) { errln("Error: After setting offset to end of the string, we should " "be at the end of the backwards iteration"); } // Run all the way through the iterator, then get the offset int32_t orderLength = 0; Order *orders = getOrders(*iter, orderLength); int32_t offset = iter->getOffset(); if (offset != test1.length()) { UnicodeString msg1("offset at end != length: "); UnicodeString msg2(" vs "); errln(msg1 + offset + msg2 + test1.length()); } // Now set the offset back to the beginning and see if it works CollationElementIterator *pristine = en_us->createCollationElementIterator(test1); iter->setOffset(0, status); if (U_FAILURE(status)) { errln("setOffset failed."); } else { assertEqual(*iter, *pristine); } // TODO: try iterating halfway through a messy string. delete pristine; delete[] orders; delete iter; }
void CollationIteratorTest::TestMaxExpansion(/* char* par */) { UErrorCode status = U_ZERO_ERROR; UnicodeString rule("&a < ab < c/aba < d < z < ch"); RuleBasedCollator *coll = new RuleBasedCollator(rule, status); UChar ch = 0; UnicodeString str(ch); CollationElementIterator *iter = coll->createCollationElementIterator(str); while (ch < 0xFFFF && U_SUCCESS(status)) { int count = 1; uint32_t order; ch ++; UnicodeString str(ch); iter->setText(str, status); order = iter->previous(status); /* thai management */ if (CollationElementIterator::isIgnorable(order)) order = iter->previous(status); while (U_SUCCESS(status) && iter->previous(status) != (int32_t)UCOL_NULLORDER) { count ++; } if (U_FAILURE(status) && iter->getMaxExpansion(order) < count) { errln("Failure at codepoint %d, maximum expansion count < %d\n", ch, count); } } delete iter; delete coll; }
void SSearchTest::offsetTest() { const char *test[] = { // The sequence \u0FB3\u0F71\u0F71\u0F80 contains a discontiguous // contraction (\u0FB3\u0F71\u0F80) logically followed by \u0F71. "\\u1E33\\u0FB3\\u0F71\\u0F71\\u0F80\\uD835\\uDF6C\\u01B0", "\\ua191\\u16ef\\u2036\\u017a", #if 0 // This results in a complex interaction between contraction, // expansion and normalization that confuses the backwards offset fixups. "\\u0F7F\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85", #endif "\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85", "\\u07E9\\u07EA\\u07F1\\u07F2\\u07F3", "\\u02FE\\u02FF" "\\u0300\\u0301\\u0302\\u0303\\u0304\\u0305\\u0306\\u0307\\u0308\\u0309\\u030A\\u030B\\u030C\\u030D\\u030E\\u030F" "\\u0310\\u0311\\u0312\\u0313\\u0314\\u0315\\u0316\\u0317\\u0318\\u0319\\u031A\\u031B\\u031C\\u031D\\u031E\\u031F" "\\u0320\\u0321\\u0322\\u0323\\u0324\\u0325\\u0326\\u0327\\u0328\\u0329\\u032A\\u032B\\u032C\\u032D\\u032E\\u032F" "\\u0330\\u0331\\u0332\\u0333\\u0334\\u0335\\u0336\\u0337\\u0338\\u0339\\u033A\\u033B\\u033C\\u033D\\u033E\\u033F" "\\u0340\\u0341\\u0342\\u0343\\u0344\\u0345\\u0346\\u0347\\u0348\\u0349\\u034A\\u034B\\u034C\\u034D\\u034E", // currently not working, see #8081 "\\u02FE\\u02FF\\u0300\\u0301\\u0302\\u0303\\u0316\\u0317\\u0318", // currently not working, see #8081 "a\\u02FF\\u0301\\u0316", // currently not working, see #8081 "a\\u02FF\\u0316\\u0301", "a\\u0430\\u0301\\u0316", "a\\u0430\\u0316\\u0301", "abc\\u0E41\\u0301\\u0316", "abc\\u0E41\\u0316\\u0301", "\\u0E41\\u0301\\u0316", "\\u0E41\\u0316\\u0301", "a\\u0301\\u0316", "a\\u0316\\u0301", "\\uAC52\\uAC53", "\\u34CA\\u34CB", "\\u11ED\\u11EE", "\\u30C3\\u30D0", "p\\u00E9ch\\u00E9", "a\\u0301\\u0325", "a\\u0300\\u0325", "a\\u0325\\u0300", "A\\u0323\\u0300B", "A\\u0300\\u0323B", "A\\u0301\\u0323B", "A\\u0302\\u0301\\u0323B", "abc", "ab\\u0300c", "ab\\u0300\\u0323c", " \\uD800\\uDC00\\uDC00", "a\\uD800\\uDC00\\uDC00", "A\\u0301\\u0301", "A\\u0301\\u0323", "A\\u0301\\u0323B", "B\\u0301\\u0323C", "A\\u0300\\u0323B", "\\u0301A\\u0301\\u0301", "abcd\\r\\u0301", "p\\u00EAche", "pe\\u0302che", }; int32_t testCount = ARRAY_SIZE(test); UErrorCode status = U_ZERO_ERROR; RuleBasedCollator *col = (RuleBasedCollator *) Collator::createInstance(Locale::getEnglish(), status); if (U_FAILURE(status)) { errcheckln(status, "Failed to create collator in offsetTest! - %s", u_errorName(status)); return; } char buffer[4096]; // A bit of a hack... just happens to be long enough for all the test cases... // We could allocate one that's the right size by (CE_count * 10) + 2 // 10 chars is enough room for 8 hex digits plus ", ". 2 extra chars for "[" and "]" col->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); for(int32_t i = 0; i < testCount; i += 1) { if (i>=4 && i<=6 && logKnownIssue("9156", "was 8081")) { continue; // timebomb until ticket #9156 (was #8081) is resolved } UnicodeString ts = CharsToUnicodeString(test[i]); CollationElementIterator *iter = col->createCollationElementIterator(ts); OrderList forwardList; OrderList backwardList; int32_t order, low, high; do { low = iter->getOffset(); order = iter->next(status); high = iter->getOffset(); forwardList.add(order, low, high); } while (order != CollationElementIterator::NULLORDER); iter->reset(); iter->setOffset(ts.length(), status); backwardList.add(CollationElementIterator::NULLORDER, iter->getOffset(), iter->getOffset()); do { high = iter->getOffset(); order = iter->previous(status); low = iter->getOffset(); if (order == CollationElementIterator::NULLORDER) { break; } backwardList.add(order, low, high); } while (TRUE); backwardList.reverse(); if (forwardList.compare(backwardList)) { logln("Works with \"%s\"", test[i]); logln("Forward offsets: [%s]", printOffsets(buffer, forwardList)); // logln("Backward offsets: [%s]", printOffsets(buffer, backwardList)); logln("Forward CEs: [%s]", printOrders(buffer, forwardList)); // logln("Backward CEs: [%s]", printOrders(buffer, backwardList)); logln(); } else { errln("Fails with \"%s\"", test[i]); infoln("Forward offsets: [%s]", printOffsets(buffer, forwardList)); infoln("Backward offsets: [%s]", printOffsets(buffer, backwardList)); infoln("Forward CEs: [%s]", printOrders(buffer, forwardList)); infoln("Backward CEs: [%s]", printOrders(buffer, backwardList)); infoln(); } delete iter; } delete col; }
void IntlTestCollator::backAndForth(CollationElementIterator &iter) { // Run through the iterator forwards and stick it into an array int32_t orderLength = 0; LocalArray<Order> orders(getOrders(iter, orderLength)); UErrorCode status = U_ZERO_ERROR; // Now go through it backwards and make sure we get the same values int32_t index = orderLength; int32_t o; // reset the iterator iter.reset(); while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER) { /*int32_t offset = */iter.getOffset(); if (index == 0) { if(o == 0) { continue; } else { // this is an error, orders exhausted but there are non-ignorable CEs from // going backwards errln("Backward iteration returned a non ignorable after orders are exhausted"); break; } } index -= 1; if (o != orders[index].order) { if (o == 0) index += 1; else { while (index > 0 && orders[--index].order == 0) { // nothing... } if (o != orders[index].order) { errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index, orders[index].order, o); //break; return; } } } #if TEST_OFFSETS if (offset != orders[index].offset) { errln("Mismatched offset at index %d: %d vs. %d", index, orders[index].offset, offset); //break; return; } #endif } while (index != 0 && orders[index - 1].order == 0) { index --; } if (index != 0) { UnicodeString msg("Didn't get back to beginning - index is "); errln(msg + index); iter.reset(); err("next: "); while ((o = iter.next(status)) != CollationElementIterator::NULLORDER) { UnicodeString hexString("0x"); appendHex(o, 8, hexString); hexString += " "; err(hexString); } errln(""); err("prev: "); while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER) { UnicodeString hexString("0x"); appendHex(o, 8, hexString); hexString += " "; err(hexString); } errln(""); } }