Пример #1
0
// @bug 4114077
//
// Collation with decomposition off doesn't work for Europe
//
void CollationRegressionTest::Test4114077(/* char* par */)
{
    // Ensure that we get the same results with decomposition off
    // as we do with it on....

    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    c->setStrength(Collator::TERTIARY);

    static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0x00C0, 0},                     {0x3d, 0}, {0x41, 0x0300, 0},            // Should be equivalent
        {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
        {0x0204, 0},                     {0x3d, 0}, {0x45, 0x030F, 0},
        {0x01fa, 0},                     {0x3d, 0}, {0x41, 0x030a, 0x0301, 0},    // a-ring-acute -> a-ring, acute
                                                //   -> a, ring, acute
        {0x41, 0x0300, 0x0316, 0},         {0x3c, 0}, {0x41, 0x0316, 0x0300, 0}        // No reordering --> unequal
    };

    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
    compareArray(*c, test1, UPRV_LENGTHOF(test1));

    static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0}      // Reordering --> equal
    };

    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    compareArray(*c, test2, UPRV_LENGTHOF(test2));

    delete c;
}
Пример #2
0
// @bug 4087241
//
// string comparison errors in Scandinavian collators
//
void CollationRegressionTest::Test4087241(/* char* par */)
{
    UErrorCode status = U_ZERO_ERROR;
    Locale da_DK("da", "DK");
    RuleBasedCollator *c = NULL;

    c = (RuleBasedCollator *) Collator::createInstance(da_DK, status);

    if (c == NULL || U_FAILURE(status))
    {
        errln("Failed to create collator for da_DK locale");
        delete c;
        return;
    }

    c->setStrength(Collator::SECONDARY);

    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0x7a, 0},          {0x3c, 0}, {0x00E6, 0},            // z        < ae
        {0x61, 0x0308, 0},  {0x3c, 0}, {0x61, 0x030A, 0},      // a-umlaut < a-ring
        {0x59, 0},          {0x3c, 0}, {0x75, 0x0308, 0},      // Y        < u-umlaut
    };

    compareArray(*c, tests, UPRV_LENGTHOF(tests));

    delete c;
}
Пример #3
0
// @bug 4062418
//
// Secondary/Tertiary comparison incorrect in French Secondary
//
void CollationRegressionTest::Test4062418(/* char* par */)
{
    UErrorCode status = U_ZERO_ERROR;

    RuleBasedCollator *c = NULL;

    c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench(), status);

    if (c == NULL || U_FAILURE(status))
    {
        errln("Failed to create collator for Locale::getCanadaFrench()");
        delete c;
        return;
    }

    c->setStrength(Collator::SECONDARY);

/*
    String[] tests = {
            "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
    };
*/
    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
    };

    compareArray(*c, tests, UPRV_LENGTHOF(tests));

    delete c;
}
Пример #4
0
void CollationIteratorTest::TestStrengthOrder()
{
    int order = 0x0123ABCD;

    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *coll = 
        (RuleBasedCollator *)Collator::createInstance(status);
    if (coll == NULL || U_FAILURE(status))
    {
        errln("Couldn't create a default collator.");
        return;
    }

    coll->setStrength(Collator::PRIMARY);
    CollationElementIterator *iter = 
        coll->createCollationElementIterator(test1);

    if (iter == NULL) {
        errln("Couldn't create a collation element iterator from default collator");
        return;
    }

    if (iter->strengthOrder(order) != 0x01230000) {
        errln("Strength order for a primary strength collator should be the first 2 bytes");
        return;
    }

    coll->setStrength(Collator::SECONDARY);
    if (iter->strengthOrder(order) != 0x0123AB00) {
        errln("Strength order for a secondary strength collator should be the third byte");
        return;
    }

    coll->setStrength(Collator::TERTIARY);
    if (iter->strengthOrder(order) != order) {
        errln("Strength order for a tertiary strength collator should be the third byte");
        return;
    }
    delete iter;
    delete coll;
}
Пример #5
0
// @bug 4087243
//
// CollationKey takes ignorable strings into account when it shouldn't
//
void CollationRegressionTest::Test4087243(/* char* par */)
{
    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    c->setStrength(Collator::TERTIARY);

    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0}    // 1 2 3  =  1 2 3 ctrl-A
    };

    compareArray(*c, tests, ARRAY_LENGTH(tests));

    delete c;
}
Пример #6
0
// @bug 4103436
//
// Collator::compare not handling spaces properly
//
void CollationRegressionTest::Test4103436(/* char* par */)
{
    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    c->setStrength(Collator::TERTIARY);

    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
        {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
    };

    compareArray(*c, tests, ARRAY_LENGTH(tests));

    delete c;
}
Пример #7
0
// @bug 4076676
//
// Bad canonicalization of same-class combining characters
//
void CollationRegressionTest::Test4076676(/* char* par */)
{
    // These combining characters are all in the same class, so they should not
    // be reordered, and they should compare as unequal.
    static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
    static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0};

    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    c->setStrength(Collator::TERTIARY);

    if (c->compare(s1,s2) == 0)
    {
        errln("Same-class combining chars were reordered");
    }

    delete c;
}
Пример #8
0
// @bug 4054736
//
// Full Decomposition mode not implemented
//
void CollationRegressionTest::Test4054736(/* char* par */)
{
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();

    c->setStrength(Collator::SECONDARY);
    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);

    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC}  // Alef-Lamed vs. Alef, Lamed
    };

    compareArray(*c, tests, ARRAY_LENGTH(tests));

    delete c;
}
Пример #9
0
// @bug 4114076
//
// Collation not Unicode conformant with Hangul syllables
//
void CollationRegressionTest::Test4114076(/* char* par */)
{
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    c->setStrength(Collator::TERTIARY);

    //
    // With Canonical decomposition, Hangul syllables should get decomposed
    // into Jamo, but Jamo characters should not be decomposed into
    // conjoining Jamo
    //
    static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
    };

    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    compareArray(*c, test1, ARRAY_LENGTH(test1));

    // From UTR #15:
    // *In earlier versions of Unicode, jamo characters like ksf
    //  had compatibility mappings to kf + sf. These mappings were
    //  removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
    // That is, the following test is obsolete as of 2.1.9

//obsolete-    // With Full decomposition, it should go all the way down to
//obsolete-    // conjoining Jamo characters.
//obsolete-    //
//obsolete-    static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
//obsolete-    {
//obsolete-        {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
//obsolete-    };
//obsolete-
//obsolete-    c->setDecomposition(Normalizer::DECOMP_COMPAT);
//obsolete-    compareArray(*c, test2, ARRAY_LENGTH(test2));

    delete c;
}
Пример #10
0
// @bug 4066696
//
// French secondary collation checking at the end of compare iteration fails
//
void CollationRegressionTest::Test4066696(/* char* par */)
{
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *c = NULL;

    c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status);

    if (c == NULL || U_FAILURE(status))
    {
        errln("Failure creating collator for Locale::getCanadaFrench()");
        delete c;
        return;
    }

    c->setStrength(Collator::SECONDARY);

/*
    String[] tests = {
        "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
    };

  should be:

    String[] tests = {
        "\u00e0",   ">",     "\u01fa",       // a-grave <  A-ring-acute
    };

*/

    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
    };

    compareArray(*c, tests, ARRAY_LENGTH(tests));

    delete c;
}
Пример #11
0
// @bug 4081866
//
// Combining characters in different classes not reordered properly.
//
void CollationRegressionTest::Test4081866(/* char* par */)
{
    // These combining characters are all in different classes,
    // so they should be reordered and the strings should compare as equal.
    static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
    static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};

    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    c->setStrength(Collator::TERTIARY);

    // Now that the default collators are set to NO_DECOMPOSITION
    // (as a result of fixing bug 4114077), we must set it explicitly
    // when we're testing reordering behavior.  -- lwerner, 5/5/98
    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);

    if (c->compare(s1,s2) != 0)
    {
        errln("Combining chars were not reordered");
    }

    delete c;
}
Пример #12
0
// @bug 4054734
//
// Collator::IDENTICAL documented but not implemented
//
void CollationRegressionTest::Test4054734(/* char* par */)
{
    /*
        Here's the original Java:

        String[] decomp = {
            "\u0001",   "<",    "\u0002",
            "\u0001",   "=",    "\u0001",
            "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
            "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
        };

        String[] nodecomp = {
            "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
        };
    */

    static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0x0001, 0},      {0x3c, 0}, {0x0002, 0},
        {0x0001, 0},      {0x3d, 0}, {0x0001, 0},
        {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
        {0x00c0, 0},      {0x3d, 0}, {0x41, 0x0300, 0}
    };


    UErrorCode status = U_ZERO_ERROR;
    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();

    c->setStrength(Collator::IDENTICAL);

    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    compareArray(*c, decomp, ARRAY_LENGTH(decomp));

    delete c;
}
Пример #13
0
// @bug 4060154
//
// MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
//
void CollationRegressionTest::Test4060154(/* char* par */)
{
    UErrorCode status = U_ZERO_ERROR;
    UnicodeString rules;

    rules += "&f < g, G < h, H < i, I < j, J";
    rules +=  " & H < ";
    rules += (UChar)0x0131;
    rules += ", ";
    rules += (UChar)0x0130;
    rules += ", i, I";

    RuleBasedCollator *c = NULL;

    c = new RuleBasedCollator(rules, status);

    if (c == NULL || U_FAILURE(status))
    {
        errln("failure building collator.");
        delete c;
        return;
    }

    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);

 /*
    String[] tertiary = {
        "A",        "<",    "B",
        "H",        "<",    "\u0131",
        "H",        "<",    "I",
        "\u0131",   "<",    "\u0130",
        "\u0130",   "<",    "i",
        "\u0130",   ">",    "H",
    };
*/

    static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0x41, 0},    {0x3c, 0}, {0x42, 0},
        {0x48, 0},    {0x3c, 0}, {0x0131, 0},
        {0x48, 0},    {0x3c, 0}, {0x49, 0},
        {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
        {0x0130, 0}, {0x3c, 0}, {0x69, 0},
        {0x0130, 0}, {0x3e, 0}, {0x48, 0}
    };

    c->setStrength(Collator::TERTIARY);
    compareArray(*c, tertiary, ARRAY_LENGTH(tertiary));

    /*
    String[] secondary = {
        "H",        "<",    "I",
        "\u0131",   "=",    "\u0130",
    };
*/
    static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
    {
        {0x48, 0},    {0x3c, 0}, {0x49, 0},
        {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
    };

    c->setStrength(Collator::PRIMARY);
    compareArray(*c, secondary, ARRAY_LENGTH(secondary));

    delete c;
}