Ejemplo n.º 1
0
/**
 * Returns the collator to use for lenient parsing.  The collator is lazily created:
 * this function creates it the first time it's called.
 * @return The collator to use for lenient parsing, or null if lenient parsing
 * is turned off.
*/
Collator*
RuleBasedNumberFormat::getCollator() const
{
#if !UCONFIG_NO_COLLATION
    if (!ruleSets) {
        return NULL;
    }

    // lazy-evaulate the collator
    if (collator == NULL && lenient) {
        // create a default collator based on the formatter's locale,
        // then pull out that collator's rules, append any additional
        // rules specified in the description, and create a _new_
        // collator based on the combinaiton of those rules

        UErrorCode status = U_ZERO_ERROR;

        Collator* temp = Collator::createInstance(locale, status);
        RuleBasedCollator* newCollator;
        if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
            if (lenientParseRules) {
                UnicodeString rules(newCollator->getRules());
                rules.append(*lenientParseRules);

                newCollator = new RuleBasedCollator(rules, status);
                // Exit if newCollator could not be created.
                if (newCollator == NULL) {
                	return NULL;
                }
            } else {
                temp = NULL;
            }
            if (U_SUCCESS(status)) {
                newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
                // cast away const
                ((RuleBasedNumberFormat*)this)->collator = newCollator;
            } else {
                delete newCollator;
            }
        }
        delete temp;
    }
#endif

    // if lenient-parse mode is off, this will be null
    // (see setLenientParseMode())
    return collator;
}
Ejemplo n.º 2
0
// @bug 4054736
//
// Full Decomposition mode not implemented
//
void CollationRegressionTest::Test4054736(/* char* par */)
{
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();

    c->setStrength(Collator::SECONDARY);
    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);

    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC}  // Alef-Lamed vs. Alef, Lamed
    };

    compareArray(*c, tests, ARRAY_LENGTH(tests));

    delete c;
}
Ejemplo n.º 3
0
// @bug 4076676
//
// Bad canonicalization of same-class combining characters
//
void CollationRegressionTest::Test4076676(/* char* par */)
{
    // These combining characters are all in the same class, so they should not
    // be reordered, and they should compare as unequal.
    static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
    static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0};

    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    c->setStrength(Collator::TERTIARY);

    if (c->compare(s1,s2) == 0)
    {
        errln("Same-class combining chars were reordered");
    }

    delete c;
}
Ejemplo n.º 4
0
U_CAPI UCollator* U_EXPORT2
ucol_openBinary(const uint8_t *bin, int32_t length,
                const UCollator *base,
                UErrorCode *status)
{
    if(U_FAILURE(*status)) { return NULL; }
    RuleBasedCollator *coll = new RuleBasedCollator(
            bin, length,
            RuleBasedCollator::rbcFromUCollator(base),
            *status);
    if(coll == NULL) {
        *status = U_MEMORY_ALLOCATION_ERROR;
        return NULL;
    }
    if(U_FAILURE(*status)) {
        delete coll;
        return NULL;
    }
    return coll->toUCollator();
}
Ejemplo n.º 5
0
void CollationIteratorTest::TestStrengthOrder()
{
    int order = 0x0123ABCD;

    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *coll = 
        (RuleBasedCollator *)Collator::createInstance(status);
    if (coll == NULL || U_FAILURE(status))
    {
        errln("Couldn't create a default collator.");
        return;
    }

    coll->setStrength(Collator::PRIMARY);
    CollationElementIterator *iter = 
        coll->createCollationElementIterator(test1);

    if (iter == NULL) {
        errln("Couldn't create a collation element iterator from default collator");
        return;
    }

    if (iter->strengthOrder(order) != 0x01230000) {
        errln("Strength order for a primary strength collator should be the first 2 bytes");
        return;
    }

    coll->setStrength(Collator::SECONDARY);
    if (iter->strengthOrder(order) != 0x0123AB00) {
        errln("Strength order for a secondary strength collator should be the third byte");
        return;
    }

    coll->setStrength(Collator::TERTIARY);
    if (iter->strengthOrder(order) != order) {
        errln("Strength order for a tertiary strength collator should be the third byte");
        return;
    }
    delete iter;
    delete coll;
}
Ejemplo n.º 6
0
// @bug 4078588
//
// RuleBasedCollator breaks on "< a < bb" rule
//
void CollationRegressionTest::Test4078588(/* char *par */)
{
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *rbc = new RuleBasedCollator("&9 < a < bb", status);

    if (rbc == NULL || U_FAILURE(status))
    {
        errln("Failed to create RuleBasedCollator.");
        delete rbc;
        return;
    }

    Collator::EComparisonResult result = rbc->compare("a","bb");

    if (result != Collator::LESS)
    {
        errln((UnicodeString)"Compare(a,bb) returned " + (int)result
            + (UnicodeString)"; expected -1");
    }

    delete rbc;
}
Ejemplo n.º 7
0
// @bug 4066696
//
// French secondary collation checking at the end of compare iteration fails
//
void CollationRegressionTest::Test4066696(/* char* par */)
{
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *c = NULL;

    c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status);

    if (c == NULL || U_FAILURE(status))
    {
        errln("Failure creating collator for Locale::getCanadaFrench()");
        delete c;
        return;
    }

    c->setStrength(Collator::SECONDARY);

/*
    String[] tests = {
        "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
    };

  should be:

    String[] tests = {
        "\u00e0",   ">",     "\u01fa",       // a-grave <  A-ring-acute
    };

*/

    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
    };

    compareArray(*c, tests, ARRAY_LENGTH(tests));

    delete c;
}
Ejemplo n.º 8
0
// @bug 4114076
//
// Collation not Unicode conformant with Hangul syllables
//
void CollationRegressionTest::Test4114076(/* char* par */)
{
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    c->setStrength(Collator::TERTIARY);

    //
    // With Canonical decomposition, Hangul syllables should get decomposed
    // into Jamo, but Jamo characters should not be decomposed into
    // conjoining Jamo
    //
    static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
    };

    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    compareArray(*c, test1, ARRAY_LENGTH(test1));

    // From UTR #15:
    // *In earlier versions of Unicode, jamo characters like ksf
    //  had compatibility mappings to kf + sf. These mappings were
    //  removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
    // That is, the following test is obsolete as of 2.1.9

//obsolete-    // With Full decomposition, it should go all the way down to
//obsolete-    // conjoining Jamo characters.
//obsolete-    //
//obsolete-    static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
//obsolete-    {
//obsolete-        {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
//obsolete-    };
//obsolete-
//obsolete-    c->setDecomposition(Normalizer::DECOMP_COMPAT);
//obsolete-    compareArray(*c, test2, ARRAY_LENGTH(test2));

    delete c;
}
Ejemplo n.º 9
0
// @bug 4081866
//
// Combining characters in different classes not reordered properly.
//
void CollationRegressionTest::Test4081866(/* char* par */)
{
    // These combining characters are all in different classes,
    // so they should be reordered and the strings should compare as equal.
    static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
    static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};

    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    c->setStrength(Collator::TERTIARY);

    // Now that the default collators are set to NO_DECOMPOSITION
    // (as a result of fixing bug 4114077), we must set it explicitly
    // when we're testing reordering behavior.  -- lwerner, 5/5/98
    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);

    if (c->compare(s1,s2) != 0)
    {
        errln("Combining chars were not reordered");
    }

    delete c;
}
Ejemplo n.º 10
0
// @bug 4059820
//
// RuleBasedCollator.getRules does not return the exact pattern as input
// for expanding character sequences
//
void CollationRegressionTest::Test4059820(/* char* par */)
{
    UErrorCode status = U_ZERO_ERROR;

    RuleBasedCollator *c = NULL;
    UnicodeString rules = "&9 < a < b , c/a < d < z";

    c = new RuleBasedCollator(rules, status);

    if (c == NULL || U_FAILURE(status))
    {
        errln("Failure building a collator.");
        delete c;
        return;
    }

    if ( c->getRules().indexOf("c/a") == -1)
    {
        errln("returned rules do not contain 'c/a'");
    }

    delete c;
}
Ejemplo n.º 11
0
void CollationIteratorTest::TestMaxExpansion(/* char* par */)
{
    UErrorCode          status = U_ZERO_ERROR; 
    UnicodeString rule("&a < ab < c/aba < d < z < ch");
    RuleBasedCollator  *coll   = new RuleBasedCollator(rule, status);
    UChar               ch     = 0;
    UnicodeString       str(ch);

    CollationElementIterator *iter   = coll->createCollationElementIterator(str);

    while (ch < 0xFFFF && U_SUCCESS(status)) {
        int      count = 1;
        uint32_t order;
        ch ++;
        UnicodeString str(ch);
        iter->setText(str, status);
        order = iter->previous(status);

        /* thai management */
        if (CollationElementIterator::isIgnorable(order))
            order = iter->previous(status);

        while (U_SUCCESS(status)
            && iter->previous(status) != (int32_t)UCOL_NULLORDER)
        {
            count ++; 
        }

        if (U_FAILURE(status) && iter->getMaxExpansion(order) < count) {
            errln("Failure at codepoint %d, maximum expansion count < %d\n",
                ch, count);
        }
    }

    delete iter;
    delete coll;
}
Ejemplo n.º 12
0
// @bug 4054734
//
// Collator::IDENTICAL documented but not implemented
//
void CollationRegressionTest::Test4054734(/* char* par */)
{
    /*
        Here's the original Java:

        String[] decomp = {
            "\u0001",   "<",    "\u0002",
            "\u0001",   "=",    "\u0001",
            "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
            "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
        };

        String[] nodecomp = {
            "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
        };
    */

    static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0x0001, 0},      {0x3c, 0}, {0x0002, 0},
        {0x0001, 0},      {0x3d, 0}, {0x0001, 0},
        {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
        {0x00c0, 0},      {0x3d, 0}, {0x41, 0x0300, 0}
    };


    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();

    c->setStrength(Collator::IDENTICAL);

    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    compareArray(*c, decomp, ARRAY_LENGTH(decomp));

    delete c;
}
Ejemplo n.º 13
0
void CollationIteratorTest::TestClearBuffers(/* char* par */)
{
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *c = new RuleBasedCollator((UnicodeString)"&a < b < c & ab = d", status);

    if (c == NULL || U_FAILURE(status))
    {
        errln("Couldn't create a RuleBasedCollator.");
        delete c;
        return;
    }

    UnicodeString source("abcd");
    CollationElementIterator *i = c->createCollationElementIterator(source);
    int32_t e0 = i->next(status);    // save the first collation element

    if (U_FAILURE(status))
    {
        errln("call to i->next() failed. err=%s", u_errorName(status));
    }
    else
    {
        i->setOffset(3, status);        // go to the expanding character

        if (U_FAILURE(status))
        {
            errln("call to i->setOffset(3) failed. err=%s", u_errorName(status));
        }
        else
        {
            i->next(status);                // but only use up half of it

            if (U_FAILURE(status))
            {
                errln("call to i->next() failed. err=%s", u_errorName(status));
            }
            else
            {
                i->setOffset(0, status);        // go back to the beginning

                if (U_FAILURE(status))
                {
                    errln("call to i->setOffset(0) failed. err=%s", u_errorName(status));
                }
                else
                {
                    int32_t e = i->next(status);    // and get this one again

                    if (U_FAILURE(status))
                    {
                        errln("call to i->next() failed. err=%s", u_errorName(status));
                    }
                    else if (e != e0)
                    {
                        errln("got 0x%X, expected 0x%X", e, e0);
                    }
                }
            }
        }
    }

    delete i;
    delete c;
}
Ejemplo n.º 14
0
void SSearchTest::offsetTest()
{
    const char *test[] = {
        // The sequence \u0FB3\u0F71\u0F71\u0F80 contains a discontiguous
        // contraction (\u0FB3\u0F71\u0F80) logically followed by \u0F71.
        "\\u1E33\\u0FB3\\u0F71\\u0F71\\u0F80\\uD835\\uDF6C\\u01B0",

        "\\ua191\\u16ef\\u2036\\u017a",

#if 0
        // This results in a complex interaction between contraction,
        // expansion and normalization that confuses the backwards offset fixups.
        "\\u0F7F\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85",
#endif

        "\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85",
        "\\u07E9\\u07EA\\u07F1\\u07F2\\u07F3",

        "\\u02FE\\u02FF"
        "\\u0300\\u0301\\u0302\\u0303\\u0304\\u0305\\u0306\\u0307\\u0308\\u0309\\u030A\\u030B\\u030C\\u030D\\u030E\\u030F"
        "\\u0310\\u0311\\u0312\\u0313\\u0314\\u0315\\u0316\\u0317\\u0318\\u0319\\u031A\\u031B\\u031C\\u031D\\u031E\\u031F"
        "\\u0320\\u0321\\u0322\\u0323\\u0324\\u0325\\u0326\\u0327\\u0328\\u0329\\u032A\\u032B\\u032C\\u032D\\u032E\\u032F"
        "\\u0330\\u0331\\u0332\\u0333\\u0334\\u0335\\u0336\\u0337\\u0338\\u0339\\u033A\\u033B\\u033C\\u033D\\u033E\\u033F"
        "\\u0340\\u0341\\u0342\\u0343\\u0344\\u0345\\u0346\\u0347\\u0348\\u0349\\u034A\\u034B\\u034C\\u034D\\u034E", // currently not working, see #8081

        "\\u02FE\\u02FF\\u0300\\u0301\\u0302\\u0303\\u0316\\u0317\\u0318", // currently not working, see #8081
        "a\\u02FF\\u0301\\u0316", // currently not working, see #8081
        "a\\u02FF\\u0316\\u0301",
        "a\\u0430\\u0301\\u0316",
        "a\\u0430\\u0316\\u0301",
        "abc\\u0E41\\u0301\\u0316",
        "abc\\u0E41\\u0316\\u0301",
        "\\u0E41\\u0301\\u0316",
        "\\u0E41\\u0316\\u0301",
        "a\\u0301\\u0316",
        "a\\u0316\\u0301",
        "\\uAC52\\uAC53",
        "\\u34CA\\u34CB",
        "\\u11ED\\u11EE",
        "\\u30C3\\u30D0",
        "p\\u00E9ch\\u00E9",
        "a\\u0301\\u0325",
        "a\\u0300\\u0325",
        "a\\u0325\\u0300",
        "A\\u0323\\u0300B",
        "A\\u0300\\u0323B",
        "A\\u0301\\u0323B",
        "A\\u0302\\u0301\\u0323B",
        "abc",
        "ab\\u0300c",
        "ab\\u0300\\u0323c",
        " \\uD800\\uDC00\\uDC00",
        "a\\uD800\\uDC00\\uDC00",
        "A\\u0301\\u0301",
        "A\\u0301\\u0323",
        "A\\u0301\\u0323B",
        "B\\u0301\\u0323C",
        "A\\u0300\\u0323B",
        "\\u0301A\\u0301\\u0301",
        "abcd\\r\\u0301",
        "p\\u00EAche",
        "pe\\u0302che",
    };

    int32_t testCount = ARRAY_SIZE(test);
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *col = (RuleBasedCollator *) Collator::createInstance(Locale::getEnglish(), status);
    if (U_FAILURE(status)) {
        errcheckln(status, "Failed to create collator in offsetTest! - %s", u_errorName(status));
        return;
    }
    char buffer[4096];  // A bit of a hack... just happens to be long enough for all the test cases...
                        // We could allocate one that's the right size by (CE_count * 10) + 2
                        // 10 chars is enough room for 8 hex digits plus ", ". 2 extra chars for "[" and "]"

    col->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);

    for(int32_t i = 0; i < testCount; i += 1) {
      if (i>=4 && i<=6 && logKnownIssue("9156", "was 8081")) {
            continue; // timebomb until ticket #9156 (was #8081) is resolved
        }
        UnicodeString ts = CharsToUnicodeString(test[i]);
        CollationElementIterator *iter = col->createCollationElementIterator(ts);
        OrderList forwardList;
        OrderList backwardList;
        int32_t order, low, high;

        do {
            low   = iter->getOffset();
            order = iter->next(status);
            high  = iter->getOffset();

            forwardList.add(order, low, high);
        } while (order != CollationElementIterator::NULLORDER);

        iter->reset();
        iter->setOffset(ts.length(), status);

        backwardList.add(CollationElementIterator::NULLORDER, iter->getOffset(), iter->getOffset());

        do {
            high  = iter->getOffset();
            order = iter->previous(status);
            low   = iter->getOffset();

            if (order == CollationElementIterator::NULLORDER) {
                break;
            }

            backwardList.add(order, low, high);
        } while (TRUE);

        backwardList.reverse();

        if (forwardList.compare(backwardList)) {
            logln("Works with \"%s\"", test[i]);
            logln("Forward offsets:  [%s]", printOffsets(buffer, forwardList));
//          logln("Backward offsets: [%s]", printOffsets(buffer, backwardList));

            logln("Forward CEs:  [%s]", printOrders(buffer, forwardList));
//          logln("Backward CEs: [%s]", printOrders(buffer, backwardList));

            logln();
        } else {
            errln("Fails with \"%s\"", test[i]);
            infoln("Forward offsets:  [%s]", printOffsets(buffer, forwardList));
            infoln("Backward offsets: [%s]", printOffsets(buffer, backwardList));

            infoln("Forward CEs:  [%s]", printOrders(buffer, forwardList));
            infoln("Backward CEs: [%s]", printOrders(buffer, backwardList));

            infoln();
        }
        delete iter;
    }
    delete col;
}
Ejemplo n.º 15
0
void CollationIteratorTest::TestAssignment()
{
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *coll = 
        (RuleBasedCollator *)Collator::createInstance(status);

    if (coll == NULL || U_FAILURE(status))
    {
        errln("Couldn't create a default collator.");
        return;
    }

    UnicodeString source("abcd");
    CollationElementIterator *iter1 = 
        coll->createCollationElementIterator(source);

    CollationElementIterator iter2 = *iter1;

    if (*iter1 != iter2) {
        errln("Fail collation iterator assignment does not produce the same elements");
    }

    CollationElementIterator iter3(*iter1);

    if (*iter1 != iter3) {
        errln("Fail collation iterator copy constructor does not produce the same elements");
    }

    source = CharsToUnicodeString("a\\u0300\\u0325");
    coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    CollationElementIterator *iter4 
                        = coll->createCollationElementIterator(source);
    CollationElementIterator iter5(*iter4);
    if (*iter4 != iter5) {
        errln("collation iterator assignment does not produce the same elements");
    }
    iter4->next(status);
    if (U_FAILURE(status) || *iter4 == iter5) {
        errln("collation iterator not equal");
    }
    iter5.next(status);
    if (U_FAILURE(status) || *iter4 != iter5) {
        errln("collation iterator equal");
    }
    iter4->next(status);
    if (U_FAILURE(status) || *iter4 == iter5) {
        errln("collation iterator not equal");
    }
    iter5.next(status);
    if (U_FAILURE(status) || *iter4 != iter5) {
        errln("collation iterator equal");
    }
    CollationElementIterator iter6(*iter4);
    if (*iter4 != iter6) {
        errln("collation iterator equal");
    }
    iter4->next(status);
    if (U_FAILURE(status) || *iter4 == iter5) {
        errln("collation iterator not equal");
    }
    iter5.next(status);
    if (U_FAILURE(status) || *iter4 != iter5) {
        errln("collation iterator equal");
    }
    iter4->next(status);
    if (U_FAILURE(status) || *iter4 == iter5) {
        errln("collation iterator not equal");
    }
    iter5.next(status);
    if (U_FAILURE(status) || *iter4 != iter5) {
        errln("collation iterator equal");
    }
    delete iter1;
    delete iter4;
    delete coll;
}
Ejemplo n.º 16
0
//
//  First characters in scripts.
//  Create a UVector whose contents are pointers to UnicodeStrings for the First Characters in each script.
//  The vector is sorted according to this index's collation.
//
//  This code is too slow to use, so for now hard code the data.
//    Hard coded implementation is follows.
//
UVector *AlphabeticIndex::firstStringsInScript(Collator *ruleBasedCollator, UErrorCode &status) {

    if (U_FAILURE(status)) {
        return NULL;
    }

    UnicodeString results[USCRIPT_CODE_LIMIT];
    UnicodeString LOWER_A = UNICODE_STRING_SIMPLE("a");

    UnicodeSetIterator siter(*TO_TRY);
    while (siter.next()) {
        const UnicodeString &current = siter.getString();
        Collator::EComparisonResult r = ruleBasedCollator->compare(current, LOWER_A);
        if (r < 0) {  // TODO fix; we only want "real" script characters, not
                      // symbols.
            continue;
        }

        int script = uscript_getScript(current.char32At(0), &status);
        if (results[script].length() == 0) {
            results[script] = current;
        }
        else if (ruleBasedCollator->compare(current, results[script]) < 0) {
            results[script] = current;
        }
    }

    UnicodeSet extras;
    UnicodeSet expansions;
    RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(ruleBasedCollator);
    const UCollator *uRuleBasedCollator = rbc->getUCollator();
    ucol_getContractionsAndExpansions(uRuleBasedCollator, extras.toUSet(), expansions.toUSet(), true, &status);
    extras.addAll(expansions).removeAll(*TO_TRY);
    if (extras.size() != 0) {
        const Normalizer2 *normalizer = Normalizer2::getNFKCInstance(status);
        UnicodeSetIterator extrasIter(extras);
        while (extrasIter.next()) {
            const UnicodeString &current = extrasIter.next();
            if (!TO_TRY->containsAll(current))
                continue;
            if (!normalizer->isNormalized(current, status) ||
                ruleBasedCollator->compare(current, LOWER_A) < 0) {
                continue;
            }
            int script = uscript_getScript(current.char32At(0), &status);
            if (results[script].length() == 0) {
                results[script] = current;
            } else if (ruleBasedCollator->compare(current, results[script]) < 0) {
                results[script] = current;
            }
        }
    }

    UVector *dest = new UVector(status);
    dest->setDeleter(uprv_deleteUObject);
    for (uint32_t i = 0; i < sizeof(results) / sizeof(results[0]); ++i) {
        if (results[i].length() > 0) {
            dest->addElement(results[i].clone(), status);
        }
    }
    dest->sortWithUComparator(sortCollateComparator, ruleBasedCollator, status);
    return dest;
}
Ejemplo n.º 17
0
void CollationCurrencyTest::currencyTest(/*char *par*/)
{
    // All the currency symbols, in collation order
    static const UChar currency[][2] =
    {
      { 0x00A4, 0x0000}, /*00A4; L; [14 36, 03, 03]    # [082B.0020.0002] # CURRENCY SIGN*/
      { 0x00A2, 0x0000}, /*00A2; L; [14 38, 03, 03]    # [082C.0020.0002] # CENT SIGN*/
      { 0xFFE0, 0x0000}, /*FFE0; L; [14 38, 03, 05]    # [082C.0020.0003] # FULLWIDTH CENT SIGN*/
      { 0x0024, 0x0000}, /*0024; L; [14 3A, 03, 03]    # [082D.0020.0002] # DOLLAR SIGN*/
      { 0xFF04, 0x0000}, /*FF04; L; [14 3A, 03, 05]    # [082D.0020.0003] # FULLWIDTH DOLLAR SIGN*/
      { 0xFE69, 0x0000}, /*FE69; L; [14 3A, 03, 1D]    # [082D.0020.000F] # SMALL DOLLAR SIGN*/
      { 0x00A3, 0x0000}, /*00A3; L; [14 3C, 03, 03]    # [082E.0020.0002] # POUND SIGN*/
      { 0xFFE1, 0x0000}, /*FFE1; L; [14 3C, 03, 05]    # [082E.0020.0003] # FULLWIDTH POUND SIGN*/
      { 0x00A5, 0x0000}, /*00A5; L; [14 3E, 03, 03]    # [082F.0020.0002] # YEN SIGN*/
      { 0xFFE5, 0x0000}, /*FFE5; L; [14 3E, 03, 05]    # [082F.0020.0003] # FULLWIDTH YEN SIGN*/
      { 0x09F2, 0x0000}, /*09F2; L; [14 40, 03, 03]    # [0830.0020.0002] # BENGALI RUPEE MARK*/
      { 0x09F3, 0x0000}, /*09F3; L; [14 42, 03, 03]    # [0831.0020.0002] # BENGALI RUPEE SIGN*/
      { 0x0E3F, 0x0000}, /*0E3F; L; [14 44, 03, 03]    # [0832.0020.0002] # THAI CURRENCY SYMBOL BAHT*/
      { 0x17DB, 0x0000}, /*17DB; L; [14 46, 03, 03]    # [0833.0020.0002] # KHMER CURRENCY SYMBOL RIEL*/
      { 0x20A0, 0x0000}, /*20A0; L; [14 48, 03, 03]    # [0834.0020.0002] # EURO-CURRENCY SIGN*/
      { 0x20A1, 0x0000}, /*20A1; L; [14 4A, 03, 03]    # [0835.0020.0002] # COLON SIGN*/
      { 0x20A2, 0x0000}, /*20A2; L; [14 4C, 03, 03]    # [0836.0020.0002] # CRUZEIRO SIGN*/
      { 0x20A3, 0x0000}, /*20A3; L; [14 4E, 03, 03]    # [0837.0020.0002] # FRENCH FRANC SIGN*/
      { 0x20A4, 0x0000}, /*20A4; L; [14 50, 03, 03]    # [0838.0020.0002] # LIRA SIGN*/
      { 0x20A5, 0x0000}, /*20A5; L; [14 52, 03, 03]    # [0839.0020.0002] # MILL SIGN*/
      { 0x20A6, 0x0000}, /*20A6; L; [14 54, 03, 03]    # [083A.0020.0002] # NAIRA SIGN*/
      { 0x20A7, 0x0000}, /*20A7; L; [14 56, 03, 03]    # [083B.0020.0002] # PESETA SIGN*/
      { 0x20A9, 0x0000}, /*20A9; L; [14 58, 03, 03]    # [083C.0020.0002] # WON SIGN*/
      { 0xFFE6, 0x0000}, /*FFE6; L; [14 58, 03, 05]    # [083C.0020.0003] # FULLWIDTH WON SIGN*/
      { 0x20AA, 0x0000}, /*20AA; L; [14 5A, 03, 03]    # [083D.0020.0002] # NEW SHEQEL SIGN*/
      { 0x20AB, 0x0000}, /*20AB; L; [14 5C, 03, 03]    # [083E.0020.0002] # DONG SIGN*/
      { 0x20AC, 0x0000}, /*20AC; L; [14 5E, 03, 03]    # [083F.0020.0002] # EURO SIGN*/
      { 0x20AD, 0x0000}, /*20AD; L; [14 60, 03, 03]    # [0840.0020.0002] # KIP SIGN*/
      { 0x20AE, 0x0000}, /*20AE; L; [14 62, 03, 03]    # [0841.0020.0002] # TUGRIK SIGN*/
      { 0x20AF, 0x0000}, /*20AF; L; [14 64, 03, 03]    # [0842.0020.0002] # DRACHMA SIGN*/
    };
    

    uint32_t i, j;
    UErrorCode status = U_ZERO_ERROR;
    Collator::EComparisonResult expectedResult = Collator::EQUAL;
    RuleBasedCollator *c = (RuleBasedCollator *)Collator::createInstance("en_US", status);

    if (U_FAILURE(status))
    {
        errln ("Collator::createInstance() failed!");
        return;
    }

    // Compare each currency symbol against all the
    // currency symbols, including itself
    for (i = 0; i < ARRAY_LENGTH(currency); i += 1)
    {
        for (j = 0; j < ARRAY_LENGTH(currency); j += 1)
        {
            UnicodeString source(currency[i], 1);
            UnicodeString target(currency[j], 1);

            if (i < j)
            {
                expectedResult = Collator::LESS;
            }
            else if ( i == j)
            {
                expectedResult = Collator::EQUAL;
            }
            else
            {
                expectedResult = Collator::GREATER;
            }

            Collator::EComparisonResult compareResult = c->compare(source, target);

            CollationKey sourceKey, targetKey;
            UErrorCode status = U_ZERO_ERROR;

            c->getCollationKey(source, sourceKey, status);

            if (U_FAILURE(status))
            {
                errln("Couldn't get collationKey for source");
                continue;
            }

            c->getCollationKey(target, targetKey, status);

            if (U_FAILURE(status))
            {
                errln("Couldn't get collationKey for target");
                continue;
            }

            Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey);

            reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );

        }
    }
    delete c;
}
Ejemplo n.º 18
0
My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status)
  : RuleBasedCollator(rbc.getRules(), status)
{
}
Ejemplo n.º 19
0
// @bug 4060154
//
// MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
//
void CollationRegressionTest::Test4060154(/* char* par */)
{
    UErrorCode status = U_ZERO_ERROR;
    UnicodeString rules;

    rules += "&f < g, G < h, H < i, I < j, J";
    rules +=  " & H < ";
    rules += (UChar)0x0131;
    rules += ", ";
    rules += (UChar)0x0130;
    rules += ", i, I";

    RuleBasedCollator *c = NULL;

    c = new RuleBasedCollator(rules, status);

    if (c == NULL || U_FAILURE(status))
    {
        errln("failure building collator.");
        delete c;
        return;
    }

    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);

 /*
    String[] tertiary = {
        "A",        "<",    "B",
        "H",        "<",    "\u0131",
        "H",        "<",    "I",
        "\u0131",   "<",    "\u0130",
        "\u0130",   "<",    "i",
        "\u0130",   ">",    "H",
    };
*/

    static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0x41, 0},    {0x3c, 0}, {0x42, 0},
        {0x48, 0},    {0x3c, 0}, {0x0131, 0},
        {0x48, 0},    {0x3c, 0}, {0x49, 0},
        {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
        {0x0130, 0}, {0x3c, 0}, {0x69, 0},
        {0x0130, 0}, {0x3e, 0}, {0x48, 0}
    };

    c->setStrength(Collator::TERTIARY);
    compareArray(*c, tertiary, ARRAY_LENGTH(tertiary));

    /*
    String[] secondary = {
        "H",        "<",    "I",
        "\u0131",   "=",    "\u0130",
    };
*/
    static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0x48, 0},    {0x3c, 0}, {0x49, 0},
        {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
    };

    c->setStrength(Collator::PRIMARY);
    compareArray(*c, secondary, ARRAY_LENGTH(secondary));

    delete c;
}