UNormalizationCheckResult
FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
    uprv_checkCanGetBuffer(s, errorCode);
    if(U_FAILURE(errorCode)) {
        return UNORM_MAYBE;
    }
    UNormalizationCheckResult result=UNORM_YES;
    USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
    for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
        int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
        if(spanCondition==USET_SPAN_NOT_CONTAINED) {
            spanCondition=USET_SPAN_SIMPLE;
        } else {
            UNormalizationCheckResult qcResult=
                norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
            if(U_FAILURE(errorCode) || qcResult==UNORM_NO) {
                return qcResult;
            } else if(qcResult==UNORM_MAYBE) {
                result=qcResult;
            }
            spanCondition=USET_SPAN_NOT_CONTAINED;
        }
        prevSpanLimit=spanLimit;
    }
    return result;
}
int32_t
FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
    uprv_checkCanGetBuffer(s, errorCode);
    if(U_FAILURE(errorCode)) {
        return 0;
    }
    USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
    for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
        int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
        if(spanCondition==USET_SPAN_NOT_CONTAINED) {
            spanCondition=USET_SPAN_SIMPLE;
        } else {
            int32_t yesLimit=
                prevSpanLimit+
                norm2.spanQuickCheckYes(
                    s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
            if(U_FAILURE(errorCode) || yesLimit<spanLimit) {
                return yesLimit;
            }
            spanCondition=USET_SPAN_NOT_CONTAINED;
        }
        prevSpanLimit=spanLimit;
    }
    return s.length();
}
Ejemplo n.º 3
0
UBool
FilteredNormalizer2::isNormalized(const UnicodeString & s, UErrorCode & errorCode) const
{
	uprv_checkCanGetBuffer(s, errorCode);
	if (U_FAILURE(errorCode))
	{
		return FALSE;
	}
	USetSpanCondition spanCondition = USET_SPAN_SIMPLE;
	for (int32_t prevSpanLimit = 0; prevSpanLimit < s.length();)
	{
		int32_t spanLimit = set.span(s, prevSpanLimit, spanCondition);
		if (spanCondition == USET_SPAN_NOT_CONTAINED)
		{
			spanCondition = USET_SPAN_SIMPLE;
		}
		else
		{
			if (!norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ||
			    U_FAILURE(errorCode)
			   )
			{
				return FALSE;
			}
			spanCondition = USET_SPAN_NOT_CONTAINED;
		}
		prevSpanLimit = spanLimit;
	}
	return TRUE;
}
Ejemplo n.º 4
0
void CollationRegressionTest::Test4179216() {
    // you can position a CollationElementIterator in the middle of
    // a contracting character sequence, yielding a bogus collation
    // element
    IcuTestErrorCode errorCode(*this, "Test4179216");
    RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat < crunchy", errorCode);
    UnicodeString testText = "church church catcatcher runcrunchynchy";
    CollationElementIterator *iter = coll.createCollationElementIterator(testText);

    // test that the "ch" combination works properly
    iter->setOffset(4, errorCode);
    int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode));

    iter->reset();
    int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode));

    iter->setOffset(5, errorCode);
    int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode));

    // Compares and prints only 16-bit primary weights.
    if (elt4 != elt0 || elt5 != elt0) {
        errln("The collation elements at positions 0 (0x%04x), "
                "4 (0x%04x), and 5 (0x%04x) don't match.",
                elt0, elt4, elt5);
    }

    // test that the "cat" combination works properly
    iter->setOffset(14, errorCode);
    int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode));

    iter->setOffset(15, errorCode);
    int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode));

    iter->setOffset(16, errorCode);
    int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode));

    iter->setOffset(17, errorCode);
    int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode));

    iter->setOffset(18, errorCode);
    int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode));

    iter->setOffset(19, errorCode);
    int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode));

    // Compares and prints only 16-bit primary weights.
    if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
            || elt14 != elt18 || elt14 != elt19) {
        errln("\"cat\" elements don't match: elt14 = 0x%04x, "
                "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, "
                "elt18 = 0x%04x, elt19 = 0x%04x",
                elt14, elt15, elt16, elt17, elt18, elt19);
    }

    // now generate a complete list of the collation elements,
    // first using next() and then using setOffset(), and
    // make sure both interfaces return the same set of elements
    iter->reset();

    int32_t elt = iter->next(errorCode);
    int32_t count = 0;
    while (elt != CollationElementIterator::NULLORDER) {
        ++count;
        elt = iter->next(errorCode);
    }

    LocalArray<UnicodeString> nextElements(new UnicodeString[count]);
    LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]);
    int32_t lastPos = 0;

    iter->reset();
    elt = iter->next(errorCode);
    count = 0;
    while (elt != CollationElementIterator::NULLORDER) {
        nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
        lastPos = iter->getOffset();
        elt = iter->next(errorCode);
    }
    int32_t nextElementsLength = count;
    count = 0;
    for (int32_t i = 0; i < testText.length(); ) {
        iter->setOffset(i, errorCode);
        lastPos = iter->getOffset();
        elt = iter->next(errorCode);
        setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
        i = iter->getOffset();
    }
    for (int32_t i = 0; i < nextElementsLength; i++) {
        if (nextElements[i] == setOffsetElements[i]) {
            logln(nextElements[i]);
        } else {
            errln(UnicodeString("Error: next() yielded ") + nextElements[i] +
                ", but setOffset() yielded " + setOffsetElements[i]);
        }
    }
    delete iter;
}