예제 #1
0
void CollationIteratorTest::TestOffset(/* char* par */)
{
    CollationElementIterator *iter = en_us->createCollationElementIterator(test1);
    UErrorCode status = U_ZERO_ERROR;
    // testing boundaries
    iter->setOffset(0, status);
    if (U_FAILURE(status) || iter->previous(status) != UCOL_NULLORDER) {
        errln("Error: After setting offset to 0, we should be at the end "
                "of the backwards iteration");
    }
    iter->setOffset(test1.length(), status);
    if (U_FAILURE(status) || iter->next(status) != UCOL_NULLORDER) {
        errln("Error: After setting offset to end of the string, we should "
                "be at the end of the backwards iteration");
    }

    // Run all the way through the iterator, then get the offset
    int32_t orderLength = 0;
    Order *orders = getOrders(*iter, orderLength);

    int32_t offset = iter->getOffset();

    if (offset != test1.length())
    {
        UnicodeString msg1("offset at end != length: ");
        UnicodeString msg2(" vs ");

        errln(msg1 + offset + msg2 + test1.length());
    }

    // Now set the offset back to the beginning and see if it works
    CollationElementIterator *pristine = en_us->createCollationElementIterator(test1);

    iter->setOffset(0, status);

    if (U_FAILURE(status))
    {
        errln("setOffset failed.");
    }
    else
    {
        assertEqual(*iter, *pristine);
    }

    // TODO: try iterating halfway through a messy string.

    delete pristine;
    delete[] orders;
    delete iter;
}
예제 #2
0
파일: ssearch.cpp 프로젝트: Distrotech/icu
void SSearchTest::offsetTest()
{
    const char *test[] = {
        // The sequence \u0FB3\u0F71\u0F71\u0F80 contains a discontiguous
        // contraction (\u0FB3\u0F71\u0F80) logically followed by \u0F71.
        "\\u1E33\\u0FB3\\u0F71\\u0F71\\u0F80\\uD835\\uDF6C\\u01B0",

        "\\ua191\\u16ef\\u2036\\u017a",

#if 0
        // This results in a complex interaction between contraction,
        // expansion and normalization that confuses the backwards offset fixups.
        "\\u0F7F\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85",
#endif

        "\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85",
        "\\u07E9\\u07EA\\u07F1\\u07F2\\u07F3",

        "\\u02FE\\u02FF"
        "\\u0300\\u0301\\u0302\\u0303\\u0304\\u0305\\u0306\\u0307\\u0308\\u0309\\u030A\\u030B\\u030C\\u030D\\u030E\\u030F"
        "\\u0310\\u0311\\u0312\\u0313\\u0314\\u0315\\u0316\\u0317\\u0318\\u0319\\u031A\\u031B\\u031C\\u031D\\u031E\\u031F"
        "\\u0320\\u0321\\u0322\\u0323\\u0324\\u0325\\u0326\\u0327\\u0328\\u0329\\u032A\\u032B\\u032C\\u032D\\u032E\\u032F"
        "\\u0330\\u0331\\u0332\\u0333\\u0334\\u0335\\u0336\\u0337\\u0338\\u0339\\u033A\\u033B\\u033C\\u033D\\u033E\\u033F"
        "\\u0340\\u0341\\u0342\\u0343\\u0344\\u0345\\u0346\\u0347\\u0348\\u0349\\u034A\\u034B\\u034C\\u034D\\u034E", // currently not working, see #8081

        "\\u02FE\\u02FF\\u0300\\u0301\\u0302\\u0303\\u0316\\u0317\\u0318", // currently not working, see #8081
        "a\\u02FF\\u0301\\u0316", // currently not working, see #8081
        "a\\u02FF\\u0316\\u0301",
        "a\\u0430\\u0301\\u0316",
        "a\\u0430\\u0316\\u0301",
        "abc\\u0E41\\u0301\\u0316",
        "abc\\u0E41\\u0316\\u0301",
        "\\u0E41\\u0301\\u0316",
        "\\u0E41\\u0316\\u0301",
        "a\\u0301\\u0316",
        "a\\u0316\\u0301",
        "\\uAC52\\uAC53",
        "\\u34CA\\u34CB",
        "\\u11ED\\u11EE",
        "\\u30C3\\u30D0",
        "p\\u00E9ch\\u00E9",
        "a\\u0301\\u0325",
        "a\\u0300\\u0325",
        "a\\u0325\\u0300",
        "A\\u0323\\u0300B",
        "A\\u0300\\u0323B",
        "A\\u0301\\u0323B",
        "A\\u0302\\u0301\\u0323B",
        "abc",
        "ab\\u0300c",
        "ab\\u0300\\u0323c",
        " \\uD800\\uDC00\\uDC00",
        "a\\uD800\\uDC00\\uDC00",
        "A\\u0301\\u0301",
        "A\\u0301\\u0323",
        "A\\u0301\\u0323B",
        "B\\u0301\\u0323C",
        "A\\u0300\\u0323B",
        "\\u0301A\\u0301\\u0301",
        "abcd\\r\\u0301",
        "p\\u00EAche",
        "pe\\u0302che",
    };

    int32_t testCount = ARRAY_SIZE(test);
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *col = (RuleBasedCollator *) Collator::createInstance(Locale::getEnglish(), status);
    if (U_FAILURE(status)) {
        errcheckln(status, "Failed to create collator in offsetTest! - %s", u_errorName(status));
        return;
    }
    char buffer[4096];  // A bit of a hack... just happens to be long enough for all the test cases...
                        // We could allocate one that's the right size by (CE_count * 10) + 2
                        // 10 chars is enough room for 8 hex digits plus ", ". 2 extra chars for "[" and "]"

    col->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);

    for(int32_t i = 0; i < testCount; i += 1) {
      if (i>=4 && i<=6 && logKnownIssue("9156", "was 8081")) {
            continue; // timebomb until ticket #9156 (was #8081) is resolved
        }
        UnicodeString ts = CharsToUnicodeString(test[i]);
        CollationElementIterator *iter = col->createCollationElementIterator(ts);
        OrderList forwardList;
        OrderList backwardList;
        int32_t order, low, high;

        do {
            low   = iter->getOffset();
            order = iter->next(status);
            high  = iter->getOffset();

            forwardList.add(order, low, high);
        } while (order != CollationElementIterator::NULLORDER);

        iter->reset();
        iter->setOffset(ts.length(), status);

        backwardList.add(CollationElementIterator::NULLORDER, iter->getOffset(), iter->getOffset());

        do {
            high  = iter->getOffset();
            order = iter->previous(status);
            low   = iter->getOffset();

            if (order == CollationElementIterator::NULLORDER) {
                break;
            }

            backwardList.add(order, low, high);
        } while (TRUE);

        backwardList.reverse();

        if (forwardList.compare(backwardList)) {
            logln("Works with \"%s\"", test[i]);
            logln("Forward offsets:  [%s]", printOffsets(buffer, forwardList));
//          logln("Backward offsets: [%s]", printOffsets(buffer, backwardList));

            logln("Forward CEs:  [%s]", printOrders(buffer, forwardList));
//          logln("Backward CEs: [%s]", printOrders(buffer, backwardList));

            logln();
        } else {
            errln("Fails with \"%s\"", test[i]);
            infoln("Forward offsets:  [%s]", printOffsets(buffer, forwardList));
            infoln("Backward offsets: [%s]", printOffsets(buffer, backwardList));

            infoln("Forward CEs:  [%s]", printOrders(buffer, forwardList));
            infoln("Backward CEs: [%s]", printOrders(buffer, backwardList));

            infoln();
        }
        delete iter;
    }
    delete col;
}
예제 #3
0
void CollationRegressionTest::Test4179216() {
    // you can position a CollationElementIterator in the middle of
    // a contracting character sequence, yielding a bogus collation
    // element
    IcuTestErrorCode errorCode(*this, "Test4179216");
    RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat < crunchy", errorCode);
    UnicodeString testText = "church church catcatcher runcrunchynchy";
    CollationElementIterator *iter = coll.createCollationElementIterator(testText);

    // test that the "ch" combination works properly
    iter->setOffset(4, errorCode);
    int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode));

    iter->reset();
    int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode));

    iter->setOffset(5, errorCode);
    int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode));

    // Compares and prints only 16-bit primary weights.
    if (elt4 != elt0 || elt5 != elt0) {
        errln("The collation elements at positions 0 (0x%04x), "
                "4 (0x%04x), and 5 (0x%04x) don't match.",
                elt0, elt4, elt5);
    }

    // test that the "cat" combination works properly
    iter->setOffset(14, errorCode);
    int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode));

    iter->setOffset(15, errorCode);
    int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode));

    iter->setOffset(16, errorCode);
    int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode));

    iter->setOffset(17, errorCode);
    int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode));

    iter->setOffset(18, errorCode);
    int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode));

    iter->setOffset(19, errorCode);
    int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode));

    // Compares and prints only 16-bit primary weights.
    if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
            || elt14 != elt18 || elt14 != elt19) {
        errln("\"cat\" elements don't match: elt14 = 0x%04x, "
                "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, "
                "elt18 = 0x%04x, elt19 = 0x%04x",
                elt14, elt15, elt16, elt17, elt18, elt19);
    }

    // now generate a complete list of the collation elements,
    // first using next() and then using setOffset(), and
    // make sure both interfaces return the same set of elements
    iter->reset();

    int32_t elt = iter->next(errorCode);
    int32_t count = 0;
    while (elt != CollationElementIterator::NULLORDER) {
        ++count;
        elt = iter->next(errorCode);
    }

    LocalArray<UnicodeString> nextElements(new UnicodeString[count]);
    LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]);
    int32_t lastPos = 0;

    iter->reset();
    elt = iter->next(errorCode);
    count = 0;
    while (elt != CollationElementIterator::NULLORDER) {
        nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
        lastPos = iter->getOffset();
        elt = iter->next(errorCode);
    }
    int32_t nextElementsLength = count;
    count = 0;
    for (int32_t i = 0; i < testText.length(); ) {
        iter->setOffset(i, errorCode);
        lastPos = iter->getOffset();
        elt = iter->next(errorCode);
        setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
        i = iter->getOffset();
    }
    for (int32_t i = 0; i < nextElementsLength; i++) {
        if (nextElements[i] == setOffsetElements[i]) {
            logln(nextElements[i]);
        } else {
            errln(UnicodeString("Error: next() yielded ") + nextElements[i] +
                ", but setOffset() yielded " + setOffsetElements[i]);
        }
    }
    delete iter;
}
예제 #4
0
void CollationIteratorTest::TestClearBuffers(/* char* par */)
{
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *c = new RuleBasedCollator((UnicodeString)"&a < b < c & ab = d", status);

    if (c == NULL || U_FAILURE(status))
    {
        errln("Couldn't create a RuleBasedCollator.");
        delete c;
        return;
    }

    UnicodeString source("abcd");
    CollationElementIterator *i = c->createCollationElementIterator(source);
    int32_t e0 = i->next(status);    // save the first collation element

    if (U_FAILURE(status))
    {
        errln("call to i->next() failed. err=%s", u_errorName(status));
    }
    else
    {
        i->setOffset(3, status);        // go to the expanding character

        if (U_FAILURE(status))
        {
            errln("call to i->setOffset(3) failed. err=%s", u_errorName(status));
        }
        else
        {
            i->next(status);                // but only use up half of it

            if (U_FAILURE(status))
            {
                errln("call to i->next() failed. err=%s", u_errorName(status));
            }
            else
            {
                i->setOffset(0, status);        // go back to the beginning

                if (U_FAILURE(status))
                {
                    errln("call to i->setOffset(0) failed. err=%s", u_errorName(status));
                }
                else
                {
                    int32_t e = i->next(status);    // and get this one again

                    if (U_FAILURE(status))
                    {
                        errln("call to i->next() failed. err=%s", u_errorName(status));
                    }
                    else if (e != e0)
                    {
                        errln("got 0x%X, expected 0x%X", e, e0);
                    }
                }
            }
        }
    }

    delete i;
    delete c;
}