Ejemplo n.º 1
0
void CollationThaiTest::TestReordering(void) {
  const char *tests[] = { 
                          "\\u0E41c\\u0301",       "=", "\\u0E41\\u0107", // composition
                          "\\u0E41\\uD835\\uDFCE", "<", "\\u0E41\\uD835\\uDFCF", // supplementaries
                          "\\u0E41\\uD834\\uDD5F", "=", "\\u0E41\\uD834\\uDD58\\uD834\\uDD65", // supplementary composition decomps to supplementary
                          "\\u0E41\\uD87E\\uDC02", "=", "\\u0E41\\u4E41", // supplementary composition decomps to BMP
                          "\\u0E41\\u0301",        "=", "\\u0E41\\u0301", // unsafe (just checking backwards iteration)
                          "\\u0E41\\u0301\\u0316", "=", "\\u0E41\\u0316\\u0301",
                          // after UCA 4.1, the two lines below are not equal anymore do not have equal sign
                          "\\u0e24\\u0e41",        "<", "\\u0e41\\u0e24", // exiting contraction bug
                          "\\u0e3f\\u0e3f\\u0e24\\u0e41", "<", "\\u0e3f\\u0e3f\\u0e41\\u0e24",

                          "abc\\u0E41c\\u0301",       "=", "abc\\u0E41\\u0107", // composition
                          "abc\\u0E41\\uD834\\uDC00", "<", "abc\\u0E41\\uD834\\uDC01", // supplementaries
                          "abc\\u0E41\\uD834\\uDD5F", "=", "abc\\u0E41\\uD834\\uDD58\\uD834\\uDD65", // supplementary composition decomps to supplementary
                          "abc\\u0E41\\uD87E\\uDC02", "=", "abc\\u0E41\\u4E41", // supplementary composition decomps to BMP
                          "abc\\u0E41\\u0301",        "=", "abc\\u0E41\\u0301", // unsafe (just checking backwards iteration)
                          "abc\\u0E41\\u0301\\u0316", "=", "abc\\u0E41\\u0316\\u0301",

                          "\\u0E41c\\u0301abc",       "=", "\\u0E41\\u0107abc", // composition
                          "\\u0E41\\uD834\\uDC00abc", "<", "\\u0E41\\uD834\\uDC01abc", // supplementaries
                          "\\u0E41\\uD834\\uDD5Fabc", "=", "\\u0E41\\uD834\\uDD58\\uD834\\uDD65abc", // supplementary composition decomps to supplementary
                          "\\u0E41\\uD87E\\uDC02abc", "=", "\\u0E41\\u4E41abc", // supplementary composition decomps to BMP
                          "\\u0E41\\u0301abc",        "=", "\\u0E41\\u0301abc", // unsafe (just checking backwards iteration)
                          "\\u0E41\\u0301\\u0316abc", "=", "\\u0E41\\u0316\\u0301abc",

                          "abc\\u0E41c\\u0301abc",       "=", "abc\\u0E41\\u0107abc", // composition
                          "abc\\u0E41\\uD834\\uDC00abc", "<", "abc\\u0E41\\uD834\\uDC01abc", // supplementaries
                          "abc\\u0E41\\uD834\\uDD5Fabc", "=", "abc\\u0E41\\uD834\\uDD58\\uD834\\uDD65abc", // supplementary composition decomps to supplementary
                          "abc\\u0E41\\uD87E\\uDC02abc", "=", "abc\\u0E41\\u4E41abc", // supplementary composition decomps to BMP
                          "abc\\u0E41\\u0301abc",        "=", "abc\\u0E41\\u0301abc", // unsafe (just checking backwards iteration)
                          "abc\\u0E41\\u0301\\u0316abc", "=", "abc\\u0E41\\u0316\\u0301abc",
                        };

  compareArray(*coll, tests, sizeof(tests)/sizeof(tests[0]));
 
  const char *rule = "& c < ab";
  const char *testcontraction[] = { "\\u0E41ab", ">", "\\u0E41c"}; // After UCA 4.1 Thai are normal so won't break a contraction
  UnicodeString rules;
  UErrorCode status = U_ZERO_ERROR;
  parseChars(rules, rule);
  RuleBasedCollator *rcoll = new RuleBasedCollator(rules, status);
  if(U_SUCCESS(status)) {
    compareArray(*rcoll, testcontraction, 3);
    delete rcoll;
  } else {
    errln("Couldn't instantiate collator from rules");
  }

}
Ejemplo n.º 2
0
void CollationThaiTest::TestReordering(void) {
  // Until UCA 4.1, the collation code swapped Thai/Lao prevowels with the following consonants,
  // resulting in consonant+prevowel == prevowel+consonant.
  // From UCA 5.0 on, there are order-reversing contractions for prevowel+consonant.
  // From UCA 5.0 until UCA 6.1, there was a tertiary difference between
  // consonant+prevowel and prevowel+consonant.
  // In UCA 6.2, they compare equal again.
  // The test was modified to using a collator with strength=secondary,
  // ignoring possible tertiary differences.
  const char *tests[] = {
    "\\u0E41c\\u0301",       "=", "\\u0E41\\u0107", // composition
    "\\u0E41\\U0001D7CE",    "<", "\\u0E41\\U0001D7CF", // supplementaries
    "\\u0E41\\U0001D15F",    "=", "\\u0E41\\U0001D158\\U0001D165", // supplementary composition decomps to supplementary
    "\\u0E41\\U0002F802",    "=", "\\u0E41\\u4E41", // supplementary composition decomps to BMP
    "\\u0E41\\u0301",        "=", "\\u0E41\\u0301", // unsafe (just checking backwards iteration)
    "\\u0E41\\u0301\\u0316", "=", "\\u0E41\\u0316\\u0301",

    "\\u0e24\\u0e41",        "=", "\\u0e41\\u0e24", // exiting contraction bug
    "\\u0e3f\\u0e3f\\u0e24\\u0e41", "=", "\\u0e3f\\u0e3f\\u0e41\\u0e24",

    "abc\\u0E41c\\u0301",       "=", "abc\\u0E41\\u0107", // composition
    "abc\\u0E41\\U0001D000",    "<", "abc\\u0E41\\U0001D001", // supplementaries
    "abc\\u0E41\\U0001D15F",    "=", "abc\\u0E41\\U0001D158\\U0001D165", // supplementary composition decomps to supplementary
    "abc\\u0E41\\U0002F802",    "=", "abc\\u0E41\\u4E41", // supplementary composition decomps to BMP
    "abc\\u0E41\\u0301",        "=", "abc\\u0E41\\u0301", // unsafe (just checking backwards iteration)
    "abc\\u0E41\\u0301\\u0316", "=", "abc\\u0E41\\u0316\\u0301",

    "\\u0E41c\\u0301abc",       "=", "\\u0E41\\u0107abc", // composition
    "\\u0E41\\U0001D000abc",    "<", "\\u0E41\\U0001D001abc", // supplementaries
    "\\u0E41\\U0001D15Fabc",    "=", "\\u0E41\\U0001D158\\U0001D165abc", // supplementary composition decomps to supplementary
    "\\u0E41\\U0002F802abc",    "=", "\\u0E41\\u4E41abc", // supplementary composition decomps to BMP
    "\\u0E41\\u0301abc",        "=", "\\u0E41\\u0301abc", // unsafe (just checking backwards iteration)
    "\\u0E41\\u0301\\u0316abc", "=", "\\u0E41\\u0316\\u0301abc",

    "abc\\u0E41c\\u0301abc",       "=", "abc\\u0E41\\u0107abc", // composition
    "abc\\u0E41\\U0001D000abc",    "<", "abc\\u0E41\\U0001D001abc", // supplementaries
    "abc\\u0E41\\U0001D15Fabc",    "=", "abc\\u0E41\\U0001D158\\U0001D165abc", // supplementary composition decomps to supplementary
    "abc\\u0E41\\U0002F802abc",    "=", "abc\\u0E41\\u4E41abc", // supplementary composition decomps to BMP
    "abc\\u0E41\\u0301abc",        "=", "abc\\u0E41\\u0301abc", // unsafe (just checking backwards iteration)
    "abc\\u0E41\\u0301\\u0316abc", "=", "abc\\u0E41\\u0316\\u0301abc",
  };

  LocalPointer<Collator> coll2(coll->clone());
  UErrorCode status = U_ZERO_ERROR;
  coll2->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
  if(U_FAILURE(status)) {
    errln("Unable to set the Thai collator clone to secondary strength");
    return;
  }
  compareArray(*coll2, tests, sizeof(tests)/sizeof(tests[0]));
 
  const char *rule = "& c < ab";
  const char *testcontraction[] = { "\\u0E41ab", ">", "\\u0E41c"}; // After UCA 4.1 Thai are normal so won't break a contraction
  UnicodeString rules;
  parseChars(rules, rule);
  LocalPointer<RuleBasedCollator> rcoll(new RuleBasedCollator(rules, status));
  if(U_SUCCESS(status)) {
    compareArray(*rcoll, testcontraction, 3);
  } else {
    errln("Couldn't instantiate collator from rules");
  }

}
Ejemplo n.º 3
0
void CollationThaiTest::compareArray(Collator& c, const char* tests[],
                                     int32_t testsLength) {
    for (int32_t i = 0; i < testsLength; i += 3) {

        Collator::EComparisonResult expect;
        if (tests[i+1][0] == '<') {
          expect = Collator::LESS;
        } else if (tests[i+1][0] == '>') {
          expect = Collator::GREATER;
        } else if (tests[i+1][0] == '=') {
          expect = Collator::EQUAL;
        } else {
            // expect = Integer.decode(tests[i+1]).intValue();
            errln((UnicodeString)"Error: unknown operator " + tests[i+1]);
            return;
        }

        UnicodeString s1, s2;
        parseChars(s1, tests[i]);
        parseChars(s2, tests[i+2]);

        doTest(&c, s1, s2, expect);
#if 0
        UErrorCode status = U_ZERO_ERROR;
        int32_t result = c.compare(s1, s2);
        if (sign(result) != sign(expect))
        {
            UnicodeString t1, t2;
            errln(UnicodeString("") +
                  i/3 + ": compare(" + IntlTest::prettify(s1, t1)
                  + " , " + IntlTest::prettify(s2, t2)
                  + ") got " + result + "; expected " + expect);

            CollationKey k1, k2;
            c.getCollationKey(s1, k1, status);
            c.getCollationKey(s2, k2, status);
            if (U_FAILURE(status)) {
                errln((UnicodeString)"Fail: getCollationKey returned " + u_errorName(status));
                return;
            }
            errln((UnicodeString)"  key1: " + prettify(k1, t1) );
            errln((UnicodeString)"  key2: " + prettify(k2, t2) );
        }
        else
        {
            // Collator.compare worked OK; now try the collation keys
            CollationKey k1, k2;
            c.getCollationKey(s1, k1, status);
            c.getCollationKey(s2, k2, status);
            if (U_FAILURE(status)) {
                errln((UnicodeString)"Fail: getCollationKey returned " + u_errorName(status));
                return;
            }

            result = k1.compareTo(k2);
            if (sign(result) != sign(expect)) {
                UnicodeString t1, t2;
                errln(UnicodeString("") +
                      i/3 + ": key(" + IntlTest::prettify(s1, t1)
                      + ").compareTo(key(" + IntlTest::prettify(s2, t2)
                      + ")) got " + result + "; expected " + expect);
                
                errln((UnicodeString)"  " + prettify(k1, t1) + " vs. " + prettify(k2, t2));
            }
        }
#endif
    }
}