예제 #1
0
void AlphabeticIndexTest::TestChineseUnihan() {
    UErrorCode status = U_ZERO_ERROR;
    AlphabeticIndex index("zh-u-co-unihan", status);
    if(U_FAILURE(status)) {
        dataerrln("unable create an AlphabeticIndex for Chinese/unihan: %s", u_errorName(status));
        return;
    }
    index.setMaxLabelCount(500, status);  // ICU 54 default is 99.
    LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status));
    TEST_CHECK_STATUS;
    int32_t bucketCount = immIndex->getBucketCount();
    if(bucketCount < 216) {
        // There should be at least an underflow and overflow label,
        // and one for each of 214 radicals,
        // and maybe additional labels for simplified radicals.
        dataerrln("too few buckets/labels for Chinese/unihan: %d (is zh/unihan data available?)",
                  bucketCount);
        return;
    } else {
        logln("Chinese/unihan has %d buckets/labels", bucketCount);
    }
    // bucketIndex = radical number, adjusted for simplified radicals in lower buckets.
    int32_t bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x4e5d), status);
    assertEquals("getBucketIndex(U+4E5D)", 5, bucketIndex);
    // radical 100, and there is a 90' since Unicode 8
    bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x7527), status);
    assertEquals("getBucketIndex(U+7527)", 101, bucketIndex);
}
void AlphabeticIndexTest::TestChineseZhuyin() {
    UErrorCode status = U_ZERO_ERROR;
    char loc[100];
    uloc_forLanguageTag("zh-u-co-zhuyin", loc, LENGTHOF(loc), NULL, &status);
    AlphabeticIndex index(loc, status);
    LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status));
    TEST_CHECK_STATUS; 
    assertEquals("getBucketCount()", 38, immIndex->getBucketCount());
    assertEquals("label 1", UnicodeString((UChar)0x3105), immIndex->getBucket(1)->getLabel());
    assertEquals("label 2", UnicodeString((UChar)0x3106), immIndex->getBucket(2)->getLabel());
    assertEquals("label 3", UnicodeString((UChar)0x3107), immIndex->getBucket(3)->getLabel());
    assertEquals("label 4", UnicodeString((UChar)0x3108), immIndex->getBucket(4)->getLabel());
    assertEquals("label 5", UnicodeString((UChar)0x3109), immIndex->getBucket(5)->getLabel());
}
void AlphabeticIndexTest::ManyLocalesTest() {
    UErrorCode status = U_ZERO_ERROR;
    int32_t  lc = 0;

    for (int i=0; ; ++i) {
        status = U_ZERO_ERROR;
        const char *localeName = KEY_LOCALES[i];
        if (localeName[0] == 0) {
            break;
        }
        // std::cout <<  localeName << "  ";
        Locale loc = Locale::createFromName(localeName);
        AlphabeticIndex index(loc, status);
        TEST_CHECK_STATUS;
        lc = index.getBucketCount(status);
        TEST_CHECK_STATUS;
        // std::cout << "getBucketCount() == " << lc << std::endl;

        LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status));
        TEST_CHECK_STATUS;
        TEST_ASSERT(lc == immIndex->getBucketCount());

        assertEquals("initial bucket index", -1, index.getBucketIndex());
        int32_t bucketIndex = 0;
        while (index.nextBucket(status)) {
            TEST_CHECK_STATUS;
            assertEquals("bucket index", bucketIndex, index.getBucketIndex());
            const UnicodeString &label = index.getBucketLabel();
            TEST_ASSERT(label.length()>0);
            // std::string ss;
            // std::cout << ":" << label.toUTF8String(ss);
            const AlphabeticIndex::Bucket *bucket = immIndex->getBucket(bucketIndex);
            TEST_ASSERT(bucket != NULL);
            assertEquals("bucket label vs. immutable: locale=" + UnicodeString(localeName) +
                         " index=" + bucketIndex,
                         label, bucket->getLabel());
            TEST_ASSERT(&label != &bucket->getLabel());  // not the same pointers
            UAlphabeticIndexLabelType labelType = index.getBucketLabelType();
            TEST_ASSERT(labelType == bucket->getLabelType());
            ++bucketIndex;
        }
        // std::cout << ":" << std::endl;

        TEST_ASSERT(immIndex->getBucketCount() == bucketIndex);
        TEST_ASSERT(immIndex->getBucket(-1) == NULL);
        TEST_ASSERT(immIndex->getBucket(bucketIndex) == NULL);
    }
}
void AlphabeticIndexTest::TestJapaneseKanji() {
    UErrorCode status = U_ZERO_ERROR;
    AlphabeticIndex index(Locale::getJapanese(), status);
    LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status));
    TEST_CHECK_STATUS;
    // There are no index characters for Kanji in the Japanese standard collator.
    // They should all go into the overflow bucket.
    static const UChar32 kanji[] = { 0x4E9C, 0x95C7, 0x4E00, 0x58F1 };
    int32_t overflowIndex = immIndex->getBucketCount() - 1;
    for(int32_t i = 0; i < LENGTHOF(kanji); ++i) {
        char msg[40];
        sprintf(msg, "kanji[%d]=U+%04lX in overflow bucket", (int)i, (long)kanji[i]);
        assertEquals(msg, overflowIndex, immIndex->getBucketIndex(UnicodeString(kanji[i]), status));
        TEST_CHECK_STATUS;
    }
}
void AlphabeticIndexTest::TestSchSt() {
    UErrorCode status = U_ZERO_ERROR;
    AlphabeticIndex index(Locale::getGerman(), status);
    index.addLabels(UnicodeSet("[\\u00C6{Sch*}{St*}]", status), status);
    TEST_CHECK_STATUS; 
    // ... A AE-ligature B-R S Sch St T-Z ...
    LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status));
    TEST_CHECK_STATUS; 
    assertEquals("getBucketCount()", 31, index.getBucketCount(status));
    assertEquals("immutable getBucketCount()", 31, immIndex->getBucketCount());
    static const struct TestCase {
        const char *name;
        int32_t bucketIndex;
        const char *bucketLabel;
    } testCases[] = {
        // name, bucket index, bucket label
        { "Adelbert", 1, "A" },
        { "Afrika", 1, "A" },
        { "\\u00C6sculap", 2, "\\u00C6" },
        { "Aesthet", 2, "\\u00C6" },
        { "Berlin", 3, "B" },
        { "Rilke", 19, "R" },
        { "Sacher", 20, "S" },
        { "Seiler", 20, "S" },
        { "Sultan", 20, "S" },
        { "Schiller", 21, "Sch" },
        { "Steiff", 22, "St" },
        { "Thomas", 23, "T" }
    };
    for (int32_t i = 0; i < LENGTHOF(testCases); ++i) {
        const TestCase &testCase = testCases[i];
        UnicodeString name = UnicodeString(testCase.name).unescape();
        UnicodeString label = UnicodeString(testCase.bucketLabel).unescape();
        char msg[100];
        sprintf(msg, "getBucketIndex(%s)", testCase.name);
        assertEquals(msg, testCase.bucketIndex, index.getBucketIndex(name, status));
        sprintf(msg, "immutable getBucketIndex(%s)", testCase.name);
        assertEquals(msg, testCase.bucketIndex, immIndex->getBucketIndex(name, status));
        sprintf(msg, "immutable bucket label (%s)", testCase.name);
        assertEquals(msg, label, immIndex->getBucket(testCase.bucketIndex)->getLabel());
    }
}
void AlphabeticIndexTest::TestIndexCharactersList() {
    UErrorCode status = U_ZERO_ERROR;
    for (int32_t i = 0; i < LENGTHOF(localeAndIndexCharactersLists); ++i) {
        const char *(&localeAndIndexCharacters)[2] = localeAndIndexCharactersLists[i];
        const char *locale = localeAndIndexCharacters[0];
        UnicodeString expectedIndexCharacters
            = (UnicodeString("\\u2026:") + localeAndIndexCharacters[1] + ":\\u2026").unescape();
        AlphabeticIndex index(locale, status);
        TEST_CHECK_STATUS;
        LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status));
        TEST_CHECK_STATUS;

        // Join the elements of the list to a string with delimiter ":"
        UnicodeString actualIndexCharacters;
        assertEquals(locale,
                     expectedIndexCharacters,
                     joinLabelsAndAppend(*immIndex, actualIndexCharacters));
        logln(locale + UnicodeString(": ") + actualIndexCharacters);
    }
}