Example #1
0
void CollationIteratorTest::TestConstructors()
{
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *coll = 
        (RuleBasedCollator *)Collator::createInstance(status);
    if (coll == NULL || U_FAILURE(status))
    {
        errln("Couldn't create a default collator.");
        return;
    }

    // testing protected constructor with character iterator as argument
    StringCharacterIterator chariter(test1);
    CollationElementIterator *iter1 = 
        coll->createCollationElementIterator(chariter);
    if (U_FAILURE(status)) {
        errln("Couldn't create collation element iterator with character iterator.");
        return;
    }
    CollationElementIterator *iter2 = 
        coll->createCollationElementIterator(test1);

    // initially the 2 collation element iterators should be the same
    if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2 
        || *iter2 != *iter1) {
        errln("CollationElementIterators constructed with the same string data should be the same at the start");
    }
    assertEqual(*iter1, *iter2);

    delete iter1;
    delete iter2;

    // tests empty strings
    UnicodeString empty("");
    iter1 = coll->createCollationElementIterator(empty);
    chariter.setText(empty);
    iter2 = coll->createCollationElementIterator(chariter);
    if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2 
        || *iter2 != *iter1) {
        errln("CollationElementIterators constructed with the same string data should be the same at the start");
    } 
    if (iter1->next(status) != (int32_t)UCOL_NULLORDER) {
        errln("Empty string should have no CEs.");
    }
    if (iter2->next(status) != (int32_t)UCOL_NULLORDER) {
        errln("Empty string should have no CEs.");
    }
    delete iter1;
    delete iter2;
    delete coll;
}
Example #2
0
// @bug 4101940
//
void CollationRegressionTest::Test4101940(/* char* par */)
{
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *c = NULL;
    UnicodeString rules = "&9 < a < b";
    UnicodeString nothing = "";

    c = new RuleBasedCollator(rules, status);

    if (c == NULL || U_FAILURE(status))
    {
        errln("Failed to create RuleBasedCollator");
        delete c;
        return;
    }

    CollationElementIterator *i = c->createCollationElementIterator(nothing);
    i->reset();

    if (i->next(status) != CollationElementIterator::NULLORDER)
    {
        errln("next did not return NULLORDER");
    }

    delete i;
    delete c;
}
Example #3
0
// @bug 4054238
//
// CollationElementIterator will not work correctly if the associated
// Collator object's mode is changed
//
void CollationRegressionTest::Test4054238(/* char* par */)
{
    const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
    const UnicodeString test3(chars3);
    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();

    // NOTE: The Java code uses en_us to create the CollationElementIterators
    // but I'm pretty sure that's wrong, so I've changed this to use c.
    UErrorCode status = U_ZERO_ERROR;
    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    CollationElementIterator *i1 = c->createCollationElementIterator(test3);
    delete i1;
    delete c;
}
Example #4
0
void CollationIteratorTest::TestStrengthOrder()
{
    int order = 0x0123ABCD;

    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *coll = 
        (RuleBasedCollator *)Collator::createInstance(status);
    if (coll == NULL || U_FAILURE(status))
    {
        errln("Couldn't create a default collator.");
        return;
    }

    coll->setStrength(Collator::PRIMARY);
    CollationElementIterator *iter = 
        coll->createCollationElementIterator(test1);

    if (iter == NULL) {
        errln("Couldn't create a collation element iterator from default collator");
        return;
    }

    if (iter->strengthOrder(order) != 0x01230000) {
        errln("Strength order for a primary strength collator should be the first 2 bytes");
        return;
    }

    coll->setStrength(Collator::SECONDARY);
    if (iter->strengthOrder(order) != 0x0123AB00) {
        errln("Strength order for a secondary strength collator should be the third byte");
        return;
    }

    coll->setStrength(Collator::TERTIARY);
    if (iter->strengthOrder(order) != order) {
        errln("Strength order for a tertiary strength collator should be the third byte");
        return;
    }
    delete iter;
    delete coll;
}
Example #5
0
void CollationIteratorTest::TestMaxExpansion(/* char* par */)
{
    UErrorCode          status = U_ZERO_ERROR; 
    UnicodeString rule("&a < ab < c/aba < d < z < ch");
    RuleBasedCollator  *coll   = new RuleBasedCollator(rule, status);
    UChar               ch     = 0;
    UnicodeString       str(ch);

    CollationElementIterator *iter   = coll->createCollationElementIterator(str);

    while (ch < 0xFFFF && U_SUCCESS(status)) {
        int      count = 1;
        uint32_t order;
        ch ++;
        UnicodeString str(ch);
        iter->setText(str, status);
        order = iter->previous(status);

        /* thai management */
        if (CollationElementIterator::isIgnorable(order))
            order = iter->previous(status);

        while (U_SUCCESS(status)
            && iter->previous(status) != (int32_t)UCOL_NULLORDER)
        {
            count ++; 
        }

        if (U_FAILURE(status) && iter->getMaxExpansion(order) < count) {
            errln("Failure at codepoint %d, maximum expansion count < %d\n",
                ch, count);
        }
    }

    delete iter;
    delete coll;
}
Example #6
0
void SSearchTest::offsetTest()
{
    const char *test[] = {
        // The sequence \u0FB3\u0F71\u0F71\u0F80 contains a discontiguous
        // contraction (\u0FB3\u0F71\u0F80) logically followed by \u0F71.
        "\\u1E33\\u0FB3\\u0F71\\u0F71\\u0F80\\uD835\\uDF6C\\u01B0",

        "\\ua191\\u16ef\\u2036\\u017a",

#if 0
        // This results in a complex interaction between contraction,
        // expansion and normalization that confuses the backwards offset fixups.
        "\\u0F7F\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85",
#endif

        "\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85",
        "\\u07E9\\u07EA\\u07F1\\u07F2\\u07F3",

        "\\u02FE\\u02FF"
        "\\u0300\\u0301\\u0302\\u0303\\u0304\\u0305\\u0306\\u0307\\u0308\\u0309\\u030A\\u030B\\u030C\\u030D\\u030E\\u030F"
        "\\u0310\\u0311\\u0312\\u0313\\u0314\\u0315\\u0316\\u0317\\u0318\\u0319\\u031A\\u031B\\u031C\\u031D\\u031E\\u031F"
        "\\u0320\\u0321\\u0322\\u0323\\u0324\\u0325\\u0326\\u0327\\u0328\\u0329\\u032A\\u032B\\u032C\\u032D\\u032E\\u032F"
        "\\u0330\\u0331\\u0332\\u0333\\u0334\\u0335\\u0336\\u0337\\u0338\\u0339\\u033A\\u033B\\u033C\\u033D\\u033E\\u033F"
        "\\u0340\\u0341\\u0342\\u0343\\u0344\\u0345\\u0346\\u0347\\u0348\\u0349\\u034A\\u034B\\u034C\\u034D\\u034E", // currently not working, see #8081

        "\\u02FE\\u02FF\\u0300\\u0301\\u0302\\u0303\\u0316\\u0317\\u0318", // currently not working, see #8081
        "a\\u02FF\\u0301\\u0316", // currently not working, see #8081
        "a\\u02FF\\u0316\\u0301",
        "a\\u0430\\u0301\\u0316",
        "a\\u0430\\u0316\\u0301",
        "abc\\u0E41\\u0301\\u0316",
        "abc\\u0E41\\u0316\\u0301",
        "\\u0E41\\u0301\\u0316",
        "\\u0E41\\u0316\\u0301",
        "a\\u0301\\u0316",
        "a\\u0316\\u0301",
        "\\uAC52\\uAC53",
        "\\u34CA\\u34CB",
        "\\u11ED\\u11EE",
        "\\u30C3\\u30D0",
        "p\\u00E9ch\\u00E9",
        "a\\u0301\\u0325",
        "a\\u0300\\u0325",
        "a\\u0325\\u0300",
        "A\\u0323\\u0300B",
        "A\\u0300\\u0323B",
        "A\\u0301\\u0323B",
        "A\\u0302\\u0301\\u0323B",
        "abc",
        "ab\\u0300c",
        "ab\\u0300\\u0323c",
        " \\uD800\\uDC00\\uDC00",
        "a\\uD800\\uDC00\\uDC00",
        "A\\u0301\\u0301",
        "A\\u0301\\u0323",
        "A\\u0301\\u0323B",
        "B\\u0301\\u0323C",
        "A\\u0300\\u0323B",
        "\\u0301A\\u0301\\u0301",
        "abcd\\r\\u0301",
        "p\\u00EAche",
        "pe\\u0302che",
    };

    int32_t testCount = ARRAY_SIZE(test);
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *col = (RuleBasedCollator *) Collator::createInstance(Locale::getEnglish(), status);
    if (U_FAILURE(status)) {
        errcheckln(status, "Failed to create collator in offsetTest! - %s", u_errorName(status));
        return;
    }
    char buffer[4096];  // A bit of a hack... just happens to be long enough for all the test cases...
                        // We could allocate one that's the right size by (CE_count * 10) + 2
                        // 10 chars is enough room for 8 hex digits plus ", ". 2 extra chars for "[" and "]"

    col->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);

    for(int32_t i = 0; i < testCount; i += 1) {
      if (i>=4 && i<=6 && logKnownIssue("9156", "was 8081")) {
            continue; // timebomb until ticket #9156 (was #8081) is resolved
        }
        UnicodeString ts = CharsToUnicodeString(test[i]);
        CollationElementIterator *iter = col->createCollationElementIterator(ts);
        OrderList forwardList;
        OrderList backwardList;
        int32_t order, low, high;

        do {
            low   = iter->getOffset();
            order = iter->next(status);
            high  = iter->getOffset();

            forwardList.add(order, low, high);
        } while (order != CollationElementIterator::NULLORDER);

        iter->reset();
        iter->setOffset(ts.length(), status);

        backwardList.add(CollationElementIterator::NULLORDER, iter->getOffset(), iter->getOffset());

        do {
            high  = iter->getOffset();
            order = iter->previous(status);
            low   = iter->getOffset();

            if (order == CollationElementIterator::NULLORDER) {
                break;
            }

            backwardList.add(order, low, high);
        } while (TRUE);

        backwardList.reverse();

        if (forwardList.compare(backwardList)) {
            logln("Works with \"%s\"", test[i]);
            logln("Forward offsets:  [%s]", printOffsets(buffer, forwardList));
//          logln("Backward offsets: [%s]", printOffsets(buffer, backwardList));

            logln("Forward CEs:  [%s]", printOrders(buffer, forwardList));
//          logln("Backward CEs: [%s]", printOrders(buffer, backwardList));

            logln();
        } else {
            errln("Fails with \"%s\"", test[i]);
            infoln("Forward offsets:  [%s]", printOffsets(buffer, forwardList));
            infoln("Backward offsets: [%s]", printOffsets(buffer, backwardList));

            infoln("Forward CEs:  [%s]", printOrders(buffer, forwardList));
            infoln("Backward CEs: [%s]", printOrders(buffer, backwardList));

            infoln();
        }
        delete iter;
    }
    delete col;
}
Example #7
0
void CollationIteratorTest::TestAssignment()
{
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *coll = 
        (RuleBasedCollator *)Collator::createInstance(status);

    if (coll == NULL || U_FAILURE(status))
    {
        errln("Couldn't create a default collator.");
        return;
    }

    UnicodeString source("abcd");
    CollationElementIterator *iter1 = 
        coll->createCollationElementIterator(source);

    CollationElementIterator iter2 = *iter1;

    if (*iter1 != iter2) {
        errln("Fail collation iterator assignment does not produce the same elements");
    }

    CollationElementIterator iter3(*iter1);

    if (*iter1 != iter3) {
        errln("Fail collation iterator copy constructor does not produce the same elements");
    }

    source = CharsToUnicodeString("a\\u0300\\u0325");
    coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    CollationElementIterator *iter4 
                        = coll->createCollationElementIterator(source);
    CollationElementIterator iter5(*iter4);
    if (*iter4 != iter5) {
        errln("collation iterator assignment does not produce the same elements");
    }
    iter4->next(status);
    if (U_FAILURE(status) || *iter4 == iter5) {
        errln("collation iterator not equal");
    }
    iter5.next(status);
    if (U_FAILURE(status) || *iter4 != iter5) {
        errln("collation iterator equal");
    }
    iter4->next(status);
    if (U_FAILURE(status) || *iter4 == iter5) {
        errln("collation iterator not equal");
    }
    iter5.next(status);
    if (U_FAILURE(status) || *iter4 != iter5) {
        errln("collation iterator equal");
    }
    CollationElementIterator iter6(*iter4);
    if (*iter4 != iter6) {
        errln("collation iterator equal");
    }
    iter4->next(status);
    if (U_FAILURE(status) || *iter4 == iter5) {
        errln("collation iterator not equal");
    }
    iter5.next(status);
    if (U_FAILURE(status) || *iter4 != iter5) {
        errln("collation iterator equal");
    }
    iter4->next(status);
    if (U_FAILURE(status) || *iter4 == iter5) {
        errln("collation iterator not equal");
    }
    iter5.next(status);
    if (U_FAILURE(status) || *iter4 != iter5) {
        errln("collation iterator equal");
    }
    delete iter1;
    delete iter4;
    delete coll;
}
Example #8
0
void CollationIteratorTest::TestClearBuffers(/* char* par */)
{
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *c = new RuleBasedCollator((UnicodeString)"&a < b < c & ab = d", status);

    if (c == NULL || U_FAILURE(status))
    {
        errln("Couldn't create a RuleBasedCollator.");
        delete c;
        return;
    }

    UnicodeString source("abcd");
    CollationElementIterator *i = c->createCollationElementIterator(source);
    int32_t e0 = i->next(status);    // save the first collation element

    if (U_FAILURE(status))
    {
        errln("call to i->next() failed. err=%s", u_errorName(status));
    }
    else
    {
        i->setOffset(3, status);        // go to the expanding character

        if (U_FAILURE(status))
        {
            errln("call to i->setOffset(3) failed. err=%s", u_errorName(status));
        }
        else
        {
            i->next(status);                // but only use up half of it

            if (U_FAILURE(status))
            {
                errln("call to i->next() failed. err=%s", u_errorName(status));
            }
            else
            {
                i->setOffset(0, status);        // go back to the beginning

                if (U_FAILURE(status))
                {
                    errln("call to i->setOffset(0) failed. err=%s", u_errorName(status));
                }
                else
                {
                    int32_t e = i->next(status);    // and get this one again

                    if (U_FAILURE(status))
                    {
                        errln("call to i->next() failed. err=%s", u_errorName(status));
                    }
                    else if (e != e0)
                    {
                        errln("got 0x%X, expected 0x%X", e, e0);
                    }
                }
            }
        }
    }

    delete i;
    delete c;
}