示例#1
0
void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
    if (U_FAILURE(status)) { return; }
    if (locale == NULL && collator_ == NULL) {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }

    initialLabels_         = new UnicodeSet();
    if (initialLabels_ == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }

    inflowLabel_.setTo((UChar)0x2026);    // Ellipsis
    overflowLabel_ = inflowLabel_;
    underflowLabel_ = inflowLabel_;

    if (collator_ == NULL) {
        Collator *coll = Collator::createInstance(*locale, status);
        if (U_FAILURE(status)) {
            delete coll;
            return;
        }
        if (coll == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        collator_ = dynamic_cast<RuleBasedCollator *>(coll);
        if (collator_ == NULL) {
            delete coll;
            status = U_UNSUPPORTED_ERROR;
            return;
        }
    }
    collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
    if (collatorPrimaryOnly_ == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
    firstCharsInScripts_ = firstStringsInScript(status);
    if (U_FAILURE(status)) { return; }
    firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
    // Guard against a degenerate collator where
    // some script boundary strings are primary ignorable.
    for (;;) {
        if (U_FAILURE(status)) { return; }
        if (firstCharsInScripts_->isEmpty()) {
            // AlphabeticIndex requires some non-ignorable script boundary strings.
            status = U_ILLEGAL_ARGUMENT_ERROR;
            return;
        }
        if (collatorPrimaryOnly_->compare(
                *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
                emptyString_, status) == UCOL_EQUAL) {
            firstCharsInScripts_->removeElementAt(0);
        } else {
            break;
        }
    }

    // Chinese index characters, which are specific to each of the several Chinese tailorings,
    // take precedence over the single locale data exemplar set per language.
    if (!addChineseIndexCharacters(status) && locale != NULL) {
        addIndexExemplars(*locale, status);
    }
}
void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
    if (U_FAILURE(status)) { return; }
    // Chinese index characters, which are specific to each of the several Chinese tailorings,
    // take precedence over the single locale data exemplar set per language.
    const char *language = locale.getLanguage();
    if (uprv_strcmp(language, "zh") == 0 || uprv_strcmp(language, "ja") == 0 ||
            uprv_strcmp(language, "ko") == 0) {
        // TODO: This should be done regardless of the language, but it's expensive.
        // We should add a Collator function (can be @internal)
        // to enumerate just the contractions that start with a given code point or string.
        if (addChineseIndexCharacters(status) || U_FAILURE(status)) {
            return;
        }
    }

    LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
    if (U_FAILURE(status)) {
        return;
    }

    UnicodeSet exemplars;
    ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
    if (U_SUCCESS(status)) {
        initialLabels_->addAll(exemplars);
        return;
    }
    status = U_ZERO_ERROR;  // Clear out U_MISSING_RESOURCE_ERROR

    // The locale data did not include explicit Index characters.
    // Synthesize a set of them from the locale's standard exemplar characters.
    ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
    if (U_FAILURE(status)) {
        return;
    }

    // question: should we add auxiliary exemplars?
    if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
        exemplars.add(0x61, 0x7A);
    }
    if (exemplars.containsSome(0xAC00, 0xD7A3)) {  // Hangul syllables
        // cut down to small list
        exemplars.remove(0xAC00, 0xD7A3).
            add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
            add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
            add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
            add(0xD30C).add(0xD558);
    }
    if (exemplars.containsSome(0x1200, 0x137F)) {  // Ethiopic block
        // cut down to small list
        // make use of the fact that Ethiopic is allocated in 8's, where
        // the base is 0 mod 8.
        UnicodeSet ethiopic(
            UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
        UnicodeSetIterator it(ethiopic);
        while (it.next() && !it.isString()) {
            if ((it.getCodepoint() & 0x7) != 0) {
                exemplars.remove(it.getCodepoint());
            }
        }
    }

    // Upper-case any that aren't already so.
    //   (We only do this for synthesized index characters.)
    UnicodeSetIterator it(exemplars);
    UnicodeString upperC;
    while (it.next()) {
        const UnicodeString &exemplarC = it.getString();
        upperC = exemplarC;
        upperC.toUpper(locale);
        initialLabels_->add(upperC);
    }
}