UNormalizationCheckResult FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { uprv_checkCanGetBuffer(s, errorCode); if(U_FAILURE(errorCode)) { return UNORM_MAYBE; } UNormalizationCheckResult result=UNORM_YES; USetSpanCondition spanCondition=USET_SPAN_SIMPLE; for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) { int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition); if(spanCondition==USET_SPAN_NOT_CONTAINED) { spanCondition=USET_SPAN_SIMPLE; } else { UNormalizationCheckResult qcResult= norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode); if(U_FAILURE(errorCode) || qcResult==UNORM_NO) { return qcResult; } else if(qcResult==UNORM_MAYBE) { result=qcResult; } spanCondition=USET_SPAN_NOT_CONTAINED; } prevSpanLimit=spanLimit; } return result; }
int32_t FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { uprv_checkCanGetBuffer(s, errorCode); if(U_FAILURE(errorCode)) { return 0; } USetSpanCondition spanCondition=USET_SPAN_SIMPLE; for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) { int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition); if(spanCondition==USET_SPAN_NOT_CONTAINED) { spanCondition=USET_SPAN_SIMPLE; } else { int32_t yesLimit= prevSpanLimit+ norm2.spanQuickCheckYes( s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode); if(U_FAILURE(errorCode) || yesLimit<spanLimit) { return yesLimit; } spanCondition=USET_SPAN_NOT_CONTAINED; } prevSpanLimit=spanLimit; } return s.length(); }
UBool FilteredNormalizer2::isNormalized(const UnicodeString & s, UErrorCode & errorCode) const { uprv_checkCanGetBuffer(s, errorCode); if (U_FAILURE(errorCode)) { return FALSE; } USetSpanCondition spanCondition = USET_SPAN_SIMPLE; for (int32_t prevSpanLimit = 0; prevSpanLimit < s.length();) { int32_t spanLimit = set.span(s, prevSpanLimit, spanCondition); if (spanCondition == USET_SPAN_NOT_CONTAINED) { spanCondition = USET_SPAN_SIMPLE; } else { if (!norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) || U_FAILURE(errorCode) ) { return FALSE; } spanCondition = USET_SPAN_NOT_CONTAINED; } prevSpanLimit = spanLimit; } return TRUE; }
void CollationRegressionTest::Test4179216() { // you can position a CollationElementIterator in the middle of // a contracting character sequence, yielding a bogus collation // element IcuTestErrorCode errorCode(*this, "Test4179216"); RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat < crunchy", errorCode); UnicodeString testText = "church church catcatcher runcrunchynchy"; CollationElementIterator *iter = coll.createCollationElementIterator(testText); // test that the "ch" combination works properly iter->setOffset(4, errorCode); int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode)); iter->reset(); int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode)); iter->setOffset(5, errorCode); int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode)); // Compares and prints only 16-bit primary weights. if (elt4 != elt0 || elt5 != elt0) { errln("The collation elements at positions 0 (0x%04x), " "4 (0x%04x), and 5 (0x%04x) don't match.", elt0, elt4, elt5); } // test that the "cat" combination works properly iter->setOffset(14, errorCode); int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode)); iter->setOffset(15, errorCode); int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode)); iter->setOffset(16, errorCode); int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode)); iter->setOffset(17, errorCode); int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode)); iter->setOffset(18, errorCode); int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode)); iter->setOffset(19, errorCode); int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode)); // Compares and prints only 16-bit primary weights. if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17 || elt14 != elt18 || elt14 != elt19) { errln("\"cat\" elements don't match: elt14 = 0x%04x, " "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, " "elt18 = 0x%04x, elt19 = 0x%04x", elt14, elt15, elt16, elt17, elt18, elt19); } // now generate a complete list of the collation elements, // first using next() and then using setOffset(), and // make sure both interfaces return the same set of elements iter->reset(); int32_t elt = iter->next(errorCode); int32_t count = 0; while (elt != CollationElementIterator::NULLORDER) { ++count; elt = iter->next(errorCode); } LocalArray<UnicodeString> nextElements(new UnicodeString[count]); LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]); int32_t lastPos = 0; iter->reset(); elt = iter->next(errorCode); count = 0; while (elt != CollationElementIterator::NULLORDER) { nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset()); lastPos = iter->getOffset(); elt = iter->next(errorCode); } int32_t nextElementsLength = count; count = 0; for (int32_t i = 0; i < testText.length(); ) { iter->setOffset(i, errorCode); lastPos = iter->getOffset(); elt = iter->next(errorCode); setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset()); i = iter->getOffset(); } for (int32_t i = 0; i < nextElementsLength; i++) { if (nextElements[i] == setOffsetElements[i]) { logln(nextElements[i]); } else { errln(UnicodeString("Error: next() yielded ") + nextElements[i] + ", but setOffset() yielded " + setOffsetElements[i]); } } delete iter; }