void AlphabeticIndexTest::TestChineseUnihan() { UErrorCode status = U_ZERO_ERROR; AlphabeticIndex index("zh-u-co-unihan", status); if(U_FAILURE(status)) { dataerrln("unable create an AlphabeticIndex for Chinese/unihan: %s", u_errorName(status)); return; } index.setMaxLabelCount(500, status); // ICU 54 default is 99. LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status)); TEST_CHECK_STATUS; int32_t bucketCount = immIndex->getBucketCount(); if(bucketCount < 216) { // There should be at least an underflow and overflow label, // and one for each of 214 radicals, // and maybe additional labels for simplified radicals. dataerrln("too few buckets/labels for Chinese/unihan: %d (is zh/unihan data available?)", bucketCount); return; } else { logln("Chinese/unihan has %d buckets/labels", bucketCount); } // bucketIndex = radical number, adjusted for simplified radicals in lower buckets. int32_t bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x4e5d), status); assertEquals("getBucketIndex(U+4E5D)", 5, bucketIndex); // radical 100, and there is a 90' since Unicode 8 bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x7527), status); assertEquals("getBucketIndex(U+7527)", 101, bucketIndex); }
void AlphabeticIndexTest::TestChineseZhuyin() { UErrorCode status = U_ZERO_ERROR; char loc[100]; uloc_forLanguageTag("zh-u-co-zhuyin", loc, LENGTHOF(loc), NULL, &status); AlphabeticIndex index(loc, status); LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status)); TEST_CHECK_STATUS; assertEquals("getBucketCount()", 38, immIndex->getBucketCount()); assertEquals("label 1", UnicodeString((UChar)0x3105), immIndex->getBucket(1)->getLabel()); assertEquals("label 2", UnicodeString((UChar)0x3106), immIndex->getBucket(2)->getLabel()); assertEquals("label 3", UnicodeString((UChar)0x3107), immIndex->getBucket(3)->getLabel()); assertEquals("label 4", UnicodeString((UChar)0x3108), immIndex->getBucket(4)->getLabel()); assertEquals("label 5", UnicodeString((UChar)0x3109), immIndex->getBucket(5)->getLabel()); }
void AlphabeticIndexTest::ManyLocalesTest() { UErrorCode status = U_ZERO_ERROR; int32_t lc = 0; for (int i=0; ; ++i) { status = U_ZERO_ERROR; const char *localeName = KEY_LOCALES[i]; if (localeName[0] == 0) { break; } // std::cout << localeName << " "; Locale loc = Locale::createFromName(localeName); AlphabeticIndex index(loc, status); TEST_CHECK_STATUS; lc = index.getBucketCount(status); TEST_CHECK_STATUS; // std::cout << "getBucketCount() == " << lc << std::endl; LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status)); TEST_CHECK_STATUS; TEST_ASSERT(lc == immIndex->getBucketCount()); assertEquals("initial bucket index", -1, index.getBucketIndex()); int32_t bucketIndex = 0; while (index.nextBucket(status)) { TEST_CHECK_STATUS; assertEquals("bucket index", bucketIndex, index.getBucketIndex()); const UnicodeString &label = index.getBucketLabel(); TEST_ASSERT(label.length()>0); // std::string ss; // std::cout << ":" << label.toUTF8String(ss); const AlphabeticIndex::Bucket *bucket = immIndex->getBucket(bucketIndex); TEST_ASSERT(bucket != NULL); assertEquals("bucket label vs. immutable: locale=" + UnicodeString(localeName) + " index=" + bucketIndex, label, bucket->getLabel()); TEST_ASSERT(&label != &bucket->getLabel()); // not the same pointers UAlphabeticIndexLabelType labelType = index.getBucketLabelType(); TEST_ASSERT(labelType == bucket->getLabelType()); ++bucketIndex; } // std::cout << ":" << std::endl; TEST_ASSERT(immIndex->getBucketCount() == bucketIndex); TEST_ASSERT(immIndex->getBucket(-1) == NULL); TEST_ASSERT(immIndex->getBucket(bucketIndex) == NULL); } }
void AlphabeticIndexTest::TestJapaneseKanji() { UErrorCode status = U_ZERO_ERROR; AlphabeticIndex index(Locale::getJapanese(), status); LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status)); TEST_CHECK_STATUS; // There are no index characters for Kanji in the Japanese standard collator. // They should all go into the overflow bucket. static const UChar32 kanji[] = { 0x4E9C, 0x95C7, 0x4E00, 0x58F1 }; int32_t overflowIndex = immIndex->getBucketCount() - 1; for(int32_t i = 0; i < LENGTHOF(kanji); ++i) { char msg[40]; sprintf(msg, "kanji[%d]=U+%04lX in overflow bucket", (int)i, (long)kanji[i]); assertEquals(msg, overflowIndex, immIndex->getBucketIndex(UnicodeString(kanji[i]), status)); TEST_CHECK_STATUS; } }
void AlphabeticIndexTest::TestSchSt() { UErrorCode status = U_ZERO_ERROR; AlphabeticIndex index(Locale::getGerman(), status); index.addLabels(UnicodeSet("[\\u00C6{Sch*}{St*}]", status), status); TEST_CHECK_STATUS; // ... A AE-ligature B-R S Sch St T-Z ... LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status)); TEST_CHECK_STATUS; assertEquals("getBucketCount()", 31, index.getBucketCount(status)); assertEquals("immutable getBucketCount()", 31, immIndex->getBucketCount()); static const struct TestCase { const char *name; int32_t bucketIndex; const char *bucketLabel; } testCases[] = { // name, bucket index, bucket label { "Adelbert", 1, "A" }, { "Afrika", 1, "A" }, { "\\u00C6sculap", 2, "\\u00C6" }, { "Aesthet", 2, "\\u00C6" }, { "Berlin", 3, "B" }, { "Rilke", 19, "R" }, { "Sacher", 20, "S" }, { "Seiler", 20, "S" }, { "Sultan", 20, "S" }, { "Schiller", 21, "Sch" }, { "Steiff", 22, "St" }, { "Thomas", 23, "T" } }; for (int32_t i = 0; i < LENGTHOF(testCases); ++i) { const TestCase &testCase = testCases[i]; UnicodeString name = UnicodeString(testCase.name).unescape(); UnicodeString label = UnicodeString(testCase.bucketLabel).unescape(); char msg[100]; sprintf(msg, "getBucketIndex(%s)", testCase.name); assertEquals(msg, testCase.bucketIndex, index.getBucketIndex(name, status)); sprintf(msg, "immutable getBucketIndex(%s)", testCase.name); assertEquals(msg, testCase.bucketIndex, immIndex->getBucketIndex(name, status)); sprintf(msg, "immutable bucket label (%s)", testCase.name); assertEquals(msg, label, immIndex->getBucket(testCase.bucketIndex)->getLabel()); } }
void AlphabeticIndexTest::TestIndexCharactersList() { UErrorCode status = U_ZERO_ERROR; for (int32_t i = 0; i < LENGTHOF(localeAndIndexCharactersLists); ++i) { const char *(&localeAndIndexCharacters)[2] = localeAndIndexCharactersLists[i]; const char *locale = localeAndIndexCharacters[0]; UnicodeString expectedIndexCharacters = (UnicodeString("\\u2026:") + localeAndIndexCharacters[1] + ":\\u2026").unescape(); AlphabeticIndex index(locale, status); TEST_CHECK_STATUS; LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status)); TEST_CHECK_STATUS; // Join the elements of the list to a string with delimiter ":" UnicodeString actualIndexCharacters; assertEquals(locale, expectedIndexCharacters, joinLabelsAndAppend(*immIndex, actualIndexCharacters)); logln(locale + UnicodeString(": ") + actualIndexCharacters); } }