void genericOrderingTestWithResult(UCollator *coll, const char * const s[], uint32_t size, UCollationResult result) { UChar t1[2048] = {0}; UChar t2[2048] = {0}; UCollationElements *iter; UErrorCode status = U_ZERO_ERROR; uint32_t i = 0, j = 0; log_verbose("testing sequence:\n"); for(i = 0; i < size; i++) { log_verbose("%s\n", s[i]); } iter = ucol_openElements(coll, t1, u_strlen(t1), &status); if (U_FAILURE(status)) { log_err("Creation of iterator failed\n"); } for(i = 0; i < size-1; i++) { for(j = i+1; j < size; j++) { u_unescape(s[i], t1, 2048); u_unescape(s[j], t2, 2048); doTest(coll, t1, t2, result); /* synwee : added collation element iterator test */ ucol_setText(iter, t1, u_strlen(t1), &status); backAndForth(iter); ucol_setText(iter, t2, u_strlen(t2), &status); backAndForth(iter); } } ucol_closeElements(iter); }
/** * Test for CollationElementIterator previous and next for the whole set of * unicode characters with normalization on. */ static void TestNormalizedUnicodeChar() { UChar source[0x100]; UCollator *th_th; UCollationElements *iter; UErrorCode status = U_ZERO_ERROR; UChar codepoint; UChar *test; /* thai should have normalization on */ th_th = ucol_open("th_TH", &status); if (U_FAILURE(status)){ log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n", myErrorName(status)); return; } for (codepoint = 1; codepoint < 0xFFFE;) { test = source; while (codepoint % 0xFF != 0) { if (u_isdefined(codepoint)) *(test ++) = codepoint; codepoint ++; } if (u_isdefined(codepoint)) *(test ++) = codepoint; if (codepoint != 0xFFFF) codepoint ++; *test = 0; iter=ucol_openElements(th_th, source, u_strlen(source), &status); if(U_FAILURE(status)){ log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", myErrorName(status)); ucol_close(th_th); return; } backAndForth(iter); ucol_closeElements(iter); iter=ucol_openElements(th_th, source, -1, &status); if(U_FAILURE(status)){ log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", myErrorName(status)); ucol_close(th_th); return; } backAndForth(iter); ucol_closeElements(iter); } ucol_close(th_th); }
void CollationIteratorTest::TestUnicodeChar() { CollationElementIterator *iter; UChar codepoint; UnicodeString source; for (codepoint = 1; codepoint < 0xFFFE;) { source.remove(); while (codepoint % 0xFF != 0) { if (u_isdefined(codepoint)) source += codepoint; codepoint ++; } if (u_isdefined(codepoint)) source += codepoint; if (codepoint != 0xFFFF) codepoint ++; iter = en_us->createCollationElementIterator(source); /* A basic test to see if it's working at all */ backAndForth(*iter); delete iter; } }
U_CDECL_END #define LINES 6 void CollationThaiTest::TestInvalidThai(void) { const char *tests[LINES] = { "\\u0E44\\u0E01\\u0E44\\u0E01", "\\u0E44\\u0E01\\u0E01\\u0E44", "\\u0E01\\u0E44\\u0E01\\u0E44", "\\u0E01\\u0E01\\u0E44\\u0E44", "\\u0E44\\u0E44\\u0E01\\u0E01", "\\u0E01\\u0E44\\u0E44\\u0E01", }; UChar strings[LINES][20]; UChar *toSort[LINES]; int32_t i = 0, j = 0, len = 0; UErrorCode coll_status = U_ZERO_ERROR; UnicodeString iteratorText; thaiColl = ucol_open ("th_TH", &coll_status); if (U_FAILURE(coll_status)) { errln("Error opening Thai collator: %s", u_errorName(coll_status)); return; } CollationElementIterator* c = ((RuleBasedCollator *)coll)->createCollationElementIterator( iteratorText ); for(i = 0; i < (int32_t)(sizeof(tests)/sizeof(tests[0])); i++) { len = u_unescape(tests[i], strings[i], 20); strings[i][len] = 0; toSort[i] = strings[i]; } qsort (toSort, LINES, sizeof (UChar *), StrCmp); for (i=0; i < LINES; i++) { logln("%i", i); for (j=i+1; j < LINES; j++) { if (ucol_strcoll (thaiColl, toSort[i], -1, toSort[j], -1) == UCOL_GREATER) { // inconsistency ordering found! errln("Inconsistent ordering between strings %i and %i", i, j); } } iteratorText.setTo(toSort[i]); c->setText(iteratorText, coll_status); backAndForth(*c); } ucol_close(thaiColl); delete c; }
void IntlTestCollator::doTest(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result) { if(col) { doTestVariant(col, source, target, result); if(result == Collator::LESS) { doTestVariant(col, target, source, Collator::GREATER); } else if (result == Collator::GREATER) { doTestVariant(col, target, source, Collator::LESS); } UErrorCode status = U_ZERO_ERROR; LocalPointer<CollationElementIterator> c(((RuleBasedCollator *)col)->createCollationElementIterator(source)); logln("Testing iterating source: "+source); backAndForth(*c); c->setText(target, status); logln("Testing iterating target: "+target); backAndForth(*c); } }
/** * Test for CollationElementIterator.previous() * * @bug 4108758 - Make sure it works with contracting characters * */ static void TestPrevious() { UCollator *coll=NULL; UChar rule[50]; UChar *source; UCollator *c1, *c2, *c3; UCollationElements *iter; UErrorCode status = U_ZERO_ERROR; UChar test1[50]; UChar test2[50]; u_uastrcpy(test1, "What subset of all possible test cases?"); u_uastrcpy(test2, "has the highest probability of detecting"); coll = ucol_open("en_US", &status); iter=ucol_openElements(coll, test1, u_strlen(test1), &status); log_verbose("English locale testing back and forth\n"); if(U_FAILURE(status)){ log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", myErrorName(status)); ucol_close(coll); return; } /* A basic test to see if it's working at all */ backAndForth(iter); ucol_closeElements(iter); ucol_close(coll); /* Test with a contracting character sequence */ u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH"); c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status); log_verbose("Contraction rule testing back and forth with no normalization\n"); if (c1 == NULL || U_FAILURE(status)) { log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n", myErrorName(status)); return; } source=(UChar*)malloc(sizeof(UChar) * 20); u_uastrcpy(source, "abchdcba"); iter=ucol_openElements(c1, source, u_strlen(source), &status); if(U_FAILURE(status)){ log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", myErrorName(status)); return; } backAndForth(iter); ucol_closeElements(iter); ucol_close(c1); /* Test with an expanding character sequence */ u_uastrcpy(rule, "&a < b < c/abd < d"); c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status); log_verbose("Expansion rule testing back and forth with no normalization\n"); if (c2 == NULL || U_FAILURE(status)) { log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n", myErrorName(status)); return; } u_uastrcpy(source, "abcd"); iter=ucol_openElements(c2, source, u_strlen(source), &status); if(U_FAILURE(status)){ log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", myErrorName(status)); return; } backAndForth(iter); ucol_closeElements(iter); ucol_close(c2); /* Now try both */ u_uastrcpy(rule, "&a < b < c/aba < d < z < ch"); c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,NULL, &status); log_verbose("Expansion/contraction rule testing back and forth with no normalization\n"); if (c3 == NULL || U_FAILURE(status)) { log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n", myErrorName(status)); return; } u_uastrcpy(source, "abcdbchdc"); iter=ucol_openElements(c3, source, u_strlen(source), &status); if(U_FAILURE(status)){ log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", myErrorName(status)); return; } backAndForth(iter); ucol_closeElements(iter); ucol_close(c3); source[0] = 0x0e41; source[1] = 0x0e02; source[2] = 0x0e41; source[3] = 0x0e02; source[4] = 0x0e27; source[5] = 0x61; source[6] = 0x62; source[7] = 0x63; source[8] = 0; coll = ucol_open("th_TH", &status); log_verbose("Thai locale testing back and forth with normalization\n"); iter=ucol_openElements(coll, source, u_strlen(source), &status); if(U_FAILURE(status)){ log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", myErrorName(status)); return; } backAndForth(iter); ucol_closeElements(iter); ucol_close(coll); /* prev test */ source[0] = 0x0061; source[1] = 0x30CF; source[2] = 0x3099; source[3] = 0x30FC; source[4] = 0; coll = ucol_open("ja_JP", &status); log_verbose("Japanese locale testing back and forth with normalization\n"); iter=ucol_openElements(coll, source, u_strlen(source), &status); if(U_FAILURE(status)){ log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", myErrorName(status)); return; } backAndForth(iter); ucol_closeElements(iter); ucol_close(coll); free(source); }
/** * Test the incremental normalization */ static void TestNormalization() { UErrorCode status = U_ZERO_ERROR; const char *str = "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315"; UCollator *coll; UChar rule[50]; int rulelen = u_unescape(str, rule, 50); int count = 0; const char *testdata[] = {"\\u1ED9", "o\\u0323\\u0302", "\\u0300\\u0315", "\\u0315\\u0300", "A\\u0300\\u0315B", "A\\u0315\\u0300B", "A\\u0316\\u0315B", "A\\u0315\\u0316B", "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316", "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B", "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"}; int32_t srclen; UChar source[10]; UCollationElements *iter; coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status); ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); if (U_FAILURE(status)){ log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n", myErrorName(status)); return; } srclen = u_unescape(testdata[0], source, 10); iter = ucol_openElements(coll, source, srclen, &status); backAndForth(iter); ucol_closeElements(iter); srclen = u_unescape(testdata[1], source, 10); iter = ucol_openElements(coll, source, srclen, &status); backAndForth(iter); ucol_closeElements(iter); while (count < 12) { srclen = u_unescape(testdata[count], source, 10); iter = ucol_openElements(coll, source, srclen, &status); if (U_FAILURE(status)){ log_err("ERROR: in creation of collator element iterator\n %s\n", myErrorName(status)); return; } backAndForth(iter); ucol_closeElements(iter); iter = ucol_openElements(coll, source, -1, &status); if (U_FAILURE(status)){ log_err("ERROR: in creation of collator element iterator\n %s\n", myErrorName(status)); return; } backAndForth(iter); ucol_closeElements(iter); count ++; } ucol_close(coll); }
/** * Test for CollationElementIterator previous and next for the whole set of * unicode characters. */ static void TestUnicodeChar() { UChar source[0x100]; UCollator *en_us; UCollationElements *iter; UErrorCode status = U_ZERO_ERROR; UChar codepoint; UChar *test; en_us = ucol_open("en_US", &status); if (U_FAILURE(status)){ log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n", myErrorName(status)); return; } for (codepoint = 1; codepoint < 0xFFFE;) { test = source; while (codepoint % 0xFF != 0) { if (u_isdefined(codepoint)) *(test ++) = codepoint; codepoint ++; } if (u_isdefined(codepoint)) *(test ++) = codepoint; if (codepoint != 0xFFFF) codepoint ++; *test = 0; iter=ucol_openElements(en_us, source, u_strlen(source), &status); if(U_FAILURE(status)){ log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", myErrorName(status)); ucol_close(en_us); return; } /* A basic test to see if it's working at all */ log_verbose("codepoint testing %x\n", codepoint); backAndForth(iter); ucol_closeElements(iter); /* null termination test */ iter=ucol_openElements(en_us, source, -1, &status); if(U_FAILURE(status)){ log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", myErrorName(status)); ucol_close(en_us); return; } /* A basic test to see if it's working at all */ backAndForth(iter); ucol_closeElements(iter); } ucol_close(en_us); }
/** * Testing the discontigous contractions */ static void TestDiscontiguos() { const char *rulestr = "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315"; UChar rule[50]; int rulelen = u_unescape(rulestr, rule, 50); const char *src[] = { "ADB", "ADBC", "A\\u0315B", "A\\u0315BC", /* base character blocked */ "XD\\u0300", "XD\\u0300\\u0315", /* non blocking combining character */ "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315", /* blocking combining character */ "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315", /* contraction prefix */ "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315", "X\\u0300\\u031A\\u0315", /* ends not with a contraction character */ "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D", "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D" }; const char *tgt[] = { /* non blocking combining character */ "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC", /* base character blocked */ "X D \\u0300", "X D \\u0300\\u0315", /* non blocking combining character */ "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319", /* blocking combining character */ "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315", /* contraction prefix */ "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319", "X\\u0300 \\u031A \\u0315", /* ends not with a contraction character */ "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D" }; int size = 20; UCollator *coll; UErrorCode status = U_ZERO_ERROR; int count = 0; UCollationElements *iter; UCollationElements *resultiter; coll = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); iter = ucol_openElements(coll, rule, 1, &status); resultiter = ucol_openElements(coll, rule, 1, &status); if (U_FAILURE(status)) { log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status)); return; } while (count < size) { UChar str[20]; UChar tstr[20]; int strLen = u_unescape(src[count], str, 20); UChar *s; ucol_setText(iter, str, strLen, &status); if (U_FAILURE(status)) { log_err("Error opening collation iterator\n"); return; } u_unescape(tgt[count], tstr, 20); s = tstr; log_verbose("count %d\n", count); for (;;) { uint32_t ce; UChar *e = u_strchr(s, 0x20); if (e == 0) { e = u_strchr(s, 0); } ucol_setText(resultiter, s, (int32_t)(e - s), &status); ce = ucol_next(resultiter, &status); if (U_FAILURE(status)) { log_err("Error manipulating collation iterator\n"); return; } while (ce != UCOL_NULLORDER) { if (ce != (uint32_t)ucol_next(iter, &status) || U_FAILURE(status)) { log_err("Discontiguos contraction test mismatch\n"); return; } ce = ucol_next(resultiter, &status); if (U_FAILURE(status)) { log_err("Error getting next collation element\n"); return; } } s = e + 1; if (*e == 0) { break; } } ucol_reset(iter); backAndForth(iter); count ++; } ucol_closeElements(resultiter); ucol_closeElements(iter); ucol_close(coll); }
void CollationIteratorTest::TestPrevious(/* char* par */) { UErrorCode status = U_ZERO_ERROR; CollationElementIterator *iter = en_us->createCollationElementIterator(test1); // A basic test to see if it's working at all backAndForth(*iter); delete iter; // Test with a contracting character sequence UnicodeString source; RuleBasedCollator *c1 = NULL; c1 = new RuleBasedCollator( (UnicodeString)"&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH", status); if (c1 == NULL || U_FAILURE(status)) { errln("Couldn't create a RuleBasedCollator with a contracting sequence."); delete c1; return; } source = "abchdcba"; iter = c1->createCollationElementIterator(source); backAndForth(*iter); delete iter; delete c1; // Test with an expanding character sequence RuleBasedCollator *c2 = NULL; c2 = new RuleBasedCollator((UnicodeString)"&a < b < c/abd < d", status); if (c2 == NULL || U_FAILURE(status)) { errln("Couldn't create a RuleBasedCollator with an expanding sequence."); delete c2; return; } source = "abcd"; iter = c2->createCollationElementIterator(source); backAndForth(*iter); delete iter; delete c2; // Now try both RuleBasedCollator *c3 = NULL; c3 = new RuleBasedCollator((UnicodeString)"&a < b < c/aba < d < z < ch", status); if (c3 == NULL || U_FAILURE(status)) { errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence."); delete c3; return; } source = "abcdbchdc"; iter = c3->createCollationElementIterator(source); backAndForth(*iter); delete iter; delete c3; status=U_ZERO_ERROR; source= CharsToUnicodeString("\\u0e41\\u0e02\\u0e41\\u0e02\\u0e27abc"); Collator *c4 = Collator::createInstance(Locale("th", "TH", ""), status); if(U_FAILURE(status)){ errln("Couldn't create a collator"); } iter = ((RuleBasedCollator*)c4)->createCollationElementIterator(source); backAndForth(*iter); delete iter; delete c4; source= CharsToUnicodeString("\\u0061\\u30CF\\u3099\\u30FC"); Collator *c5 = Collator::createInstance(Locale("ja", "JP", ""), status); iter = ((RuleBasedCollator*)c5)->createCollationElementIterator(source); if(U_FAILURE(status)){ errln("Couldn't create Japanese collator\n"); } backAndForth(*iter); delete iter; delete c5; }