Example #1
0
// @bug 4054734
//
// Collator::IDENTICAL documented but not implemented
//
void CollationRegressionTest::Test4054734(/* char* par */)
{
    /*
        Here's the original Java:

        String[] decomp = {
            "\u0001",   "<",    "\u0002",
            "\u0001",   "=",    "\u0001",
            "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
            "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
        };

        String[] nodecomp = {
            "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
        };
    */

    static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0x0001, 0},      {0x3c, 0}, {0x0002, 0},
        {0x0001, 0},      {0x3d, 0}, {0x0001, 0},
        {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
        {0x00c0, 0},      {0x3d, 0}, {0x41, 0x0300, 0}
    };


    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();

    c->setStrength(Collator::IDENTICAL);

    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    compareArray(*c, decomp, UPRV_LENGTHOF(decomp));

    delete c;
}
Example #2
0
void SSearchTest::offsetTest()
{
    const char *test[] = {
        // The sequence \u0FB3\u0F71\u0F71\u0F80 contains a discontiguous
        // contraction (\u0FB3\u0F71\u0F80) logically followed by \u0F71.
        "\\u1E33\\u0FB3\\u0F71\\u0F71\\u0F80\\uD835\\uDF6C\\u01B0",

        "\\ua191\\u16ef\\u2036\\u017a",

#if 0
        // This results in a complex interaction between contraction,
        // expansion and normalization that confuses the backwards offset fixups.
        "\\u0F7F\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85",
#endif

        "\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85",
        "\\u07E9\\u07EA\\u07F1\\u07F2\\u07F3",

        "\\u02FE\\u02FF"
        "\\u0300\\u0301\\u0302\\u0303\\u0304\\u0305\\u0306\\u0307\\u0308\\u0309\\u030A\\u030B\\u030C\\u030D\\u030E\\u030F"
        "\\u0310\\u0311\\u0312\\u0313\\u0314\\u0315\\u0316\\u0317\\u0318\\u0319\\u031A\\u031B\\u031C\\u031D\\u031E\\u031F"
        "\\u0320\\u0321\\u0322\\u0323\\u0324\\u0325\\u0326\\u0327\\u0328\\u0329\\u032A\\u032B\\u032C\\u032D\\u032E\\u032F"
        "\\u0330\\u0331\\u0332\\u0333\\u0334\\u0335\\u0336\\u0337\\u0338\\u0339\\u033A\\u033B\\u033C\\u033D\\u033E\\u033F"
        "\\u0340\\u0341\\u0342\\u0343\\u0344\\u0345\\u0346\\u0347\\u0348\\u0349\\u034A\\u034B\\u034C\\u034D\\u034E", // currently not working, see #8081

        "\\u02FE\\u02FF\\u0300\\u0301\\u0302\\u0303\\u0316\\u0317\\u0318", // currently not working, see #8081
        "a\\u02FF\\u0301\\u0316", // currently not working, see #8081
        "a\\u02FF\\u0316\\u0301",
        "a\\u0430\\u0301\\u0316",
        "a\\u0430\\u0316\\u0301",
        "abc\\u0E41\\u0301\\u0316",
        "abc\\u0E41\\u0316\\u0301",
        "\\u0E41\\u0301\\u0316",
        "\\u0E41\\u0316\\u0301",
        "a\\u0301\\u0316",
        "a\\u0316\\u0301",
        "\\uAC52\\uAC53",
        "\\u34CA\\u34CB",
        "\\u11ED\\u11EE",
        "\\u30C3\\u30D0",
        "p\\u00E9ch\\u00E9",
        "a\\u0301\\u0325",
        "a\\u0300\\u0325",
        "a\\u0325\\u0300",
        "A\\u0323\\u0300B",
        "A\\u0300\\u0323B",
        "A\\u0301\\u0323B",
        "A\\u0302\\u0301\\u0323B",
        "abc",
        "ab\\u0300c",
        "ab\\u0300\\u0323c",
        " \\uD800\\uDC00\\uDC00",
        "a\\uD800\\uDC00\\uDC00",
        "A\\u0301\\u0301",
        "A\\u0301\\u0323",
        "A\\u0301\\u0323B",
        "B\\u0301\\u0323C",
        "A\\u0300\\u0323B",
        "\\u0301A\\u0301\\u0301",
        "abcd\\r\\u0301",
        "p\\u00EAche",
        "pe\\u0302che",
    };

    int32_t testCount = ARRAY_SIZE(test);
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *col = (RuleBasedCollator *) Collator::createInstance(Locale::getEnglish(), status);
    if (U_FAILURE(status)) {
        errcheckln(status, "Failed to create collator in offsetTest! - %s", u_errorName(status));
        return;
    }
    char buffer[4096];  // A bit of a hack... just happens to be long enough for all the test cases...
                        // We could allocate one that's the right size by (CE_count * 10) + 2
                        // 10 chars is enough room for 8 hex digits plus ", ". 2 extra chars for "[" and "]"

    col->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);

    for(int32_t i = 0; i < testCount; i += 1) {
      if (i>=4 && i<=6 && logKnownIssue("9156", "was 8081")) {
            continue; // timebomb until ticket #9156 (was #8081) is resolved
        }
        UnicodeString ts = CharsToUnicodeString(test[i]);
        CollationElementIterator *iter = col->createCollationElementIterator(ts);
        OrderList forwardList;
        OrderList backwardList;
        int32_t order, low, high;

        do {
            low   = iter->getOffset();
            order = iter->next(status);
            high  = iter->getOffset();

            forwardList.add(order, low, high);
        } while (order != CollationElementIterator::NULLORDER);

        iter->reset();
        iter->setOffset(ts.length(), status);

        backwardList.add(CollationElementIterator::NULLORDER, iter->getOffset(), iter->getOffset());

        do {
            high  = iter->getOffset();
            order = iter->previous(status);
            low   = iter->getOffset();

            if (order == CollationElementIterator::NULLORDER) {
                break;
            }

            backwardList.add(order, low, high);
        } while (TRUE);

        backwardList.reverse();

        if (forwardList.compare(backwardList)) {
            logln("Works with \"%s\"", test[i]);
            logln("Forward offsets:  [%s]", printOffsets(buffer, forwardList));
//          logln("Backward offsets: [%s]", printOffsets(buffer, backwardList));

            logln("Forward CEs:  [%s]", printOrders(buffer, forwardList));
//          logln("Backward CEs: [%s]", printOrders(buffer, backwardList));

            logln();
        } else {
            errln("Fails with \"%s\"", test[i]);
            infoln("Forward offsets:  [%s]", printOffsets(buffer, forwardList));
            infoln("Backward offsets: [%s]", printOffsets(buffer, backwardList));

            infoln("Forward CEs:  [%s]", printOrders(buffer, forwardList));
            infoln("Backward CEs: [%s]", printOrders(buffer, backwardList));

            infoln();
        }
        delete iter;
    }
    delete col;
}
Example #3
0
// @bug 4060154
//
// MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
//
void CollationRegressionTest::Test4060154(/* char* par */)
{
    UErrorCode status = U_ZERO_ERROR;
    UnicodeString rules;

    rules += "&f < g, G < h, H < i, I < j, J";
    rules +=  " & H < ";
    rules += (UChar)0x0131;
    rules += ", ";
    rules += (UChar)0x0130;
    rules += ", i, I";

    RuleBasedCollator *c = NULL;

    c = new RuleBasedCollator(rules, status);

    if (c == NULL || U_FAILURE(status))
    {
        errln("failure building collator.");
        delete c;
        return;
    }

    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);

 /*
    String[] tertiary = {
        "A",        "<",    "B",
        "H",        "<",    "\u0131",
        "H",        "<",    "I",
        "\u0131",   "<",    "\u0130",
        "\u0130",   "<",    "i",
        "\u0130",   ">",    "H",
    };
*/

    static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0x41, 0},    {0x3c, 0}, {0x42, 0},
        {0x48, 0},    {0x3c, 0}, {0x0131, 0},
        {0x48, 0},    {0x3c, 0}, {0x49, 0},
        {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
        {0x0130, 0}, {0x3c, 0}, {0x69, 0},
        {0x0130, 0}, {0x3e, 0}, {0x48, 0}
    };

    c->setStrength(Collator::TERTIARY);
    compareArray(*c, tertiary, ARRAY_LENGTH(tertiary));

    /*
    String[] secondary = {
        "H",        "<",    "I",
        "\u0131",   "=",    "\u0130",
    };
*/
    static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0x48, 0},    {0x3c, 0}, {0x49, 0},
        {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
    };

    c->setStrength(Collator::PRIMARY);
    compareArray(*c, secondary, ARRAY_LENGTH(secondary));

    delete c;
}
Example #4
0
void CollationIteratorTest::TestAssignment()
{
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *coll = 
        (RuleBasedCollator *)Collator::createInstance(status);

    if (coll == NULL || U_FAILURE(status))
    {
        errln("Couldn't create a default collator.");
        return;
    }

    UnicodeString source("abcd");
    CollationElementIterator *iter1 = 
        coll->createCollationElementIterator(source);

    CollationElementIterator iter2 = *iter1;

    if (*iter1 != iter2) {
        errln("Fail collation iterator assignment does not produce the same elements");
    }

    CollationElementIterator iter3(*iter1);

    if (*iter1 != iter3) {
        errln("Fail collation iterator copy constructor does not produce the same elements");
    }

    source = CharsToUnicodeString("a\\u0300\\u0325");
    coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    CollationElementIterator *iter4 
                        = coll->createCollationElementIterator(source);
    CollationElementIterator iter5(*iter4);
    if (*iter4 != iter5) {
        errln("collation iterator assignment does not produce the same elements");
    }
    iter4->next(status);
    if (U_FAILURE(status) || *iter4 == iter5) {
        errln("collation iterator not equal");
    }
    iter5.next(status);
    if (U_FAILURE(status) || *iter4 != iter5) {
        errln("collation iterator equal");
    }
    iter4->next(status);
    if (U_FAILURE(status) || *iter4 == iter5) {
        errln("collation iterator not equal");
    }
    iter5.next(status);
    if (U_FAILURE(status) || *iter4 != iter5) {
        errln("collation iterator equal");
    }
    CollationElementIterator iter6(*iter4);
    if (*iter4 != iter6) {
        errln("collation iterator equal");
    }
    iter4->next(status);
    if (U_FAILURE(status) || *iter4 == iter5) {
        errln("collation iterator not equal");
    }
    iter5.next(status);
    if (U_FAILURE(status) || *iter4 != iter5) {
        errln("collation iterator equal");
    }
    iter4->next(status);
    if (U_FAILURE(status) || *iter4 == iter5) {
        errln("collation iterator not equal");
    }
    iter5.next(status);
    if (U_FAILURE(status) || *iter4 != iter5) {
        errln("collation iterator equal");
    }
    delete iter1;
    delete iter4;
    delete coll;
}