AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status) {
    init(status);
    if (U_FAILURE(status)) {
        return;
    }
    locale_ = locale;
    langType_ = langTypeFromLocale(locale_);

    collator_ = Collator::createInstance(locale, status);
    if (collator_ != NULL) {
        collatorPrimaryOnly_ = collator_->clone();
    }
    if (collatorPrimaryOnly_ != NULL) {
        collatorPrimaryOnly_->setStrength(Collator::PRIMARY);
    }
    getIndexExemplars(*initialLabels_, locale, status);
    indexBuildRequired_ = TRUE;
    if ((collator_ == NULL || collatorPrimaryOnly_ == NULL) && U_SUCCESS(status)) {
        status = U_MEMORY_ALLOCATION_ERROR;
    }
    firstScriptCharacters_ = firstStringsInScript(status);
}
Exemple #2
0
void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
    if (U_FAILURE(status)) { return; }
    if (locale == NULL && collator_ == NULL) {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }

    initialLabels_         = new UnicodeSet();
    if (initialLabels_ == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }

    inflowLabel_.setTo((UChar)0x2026);    // Ellipsis
    overflowLabel_ = inflowLabel_;
    underflowLabel_ = inflowLabel_;

    if (collator_ == NULL) {
        Collator *coll = Collator::createInstance(*locale, status);
        if (U_FAILURE(status)) {
            delete coll;
            return;
        }
        if (coll == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        collator_ = dynamic_cast<RuleBasedCollator *>(coll);
        if (collator_ == NULL) {
            delete coll;
            status = U_UNSUPPORTED_ERROR;
            return;
        }
    }
    collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
    if (collatorPrimaryOnly_ == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
    firstCharsInScripts_ = firstStringsInScript(status);
    if (U_FAILURE(status)) { return; }
    firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
    // Guard against a degenerate collator where
    // some script boundary strings are primary ignorable.
    for (;;) {
        if (U_FAILURE(status)) { return; }
        if (firstCharsInScripts_->isEmpty()) {
            // AlphabeticIndex requires some non-ignorable script boundary strings.
            status = U_ILLEGAL_ARGUMENT_ERROR;
            return;
        }
        if (collatorPrimaryOnly_->compare(
                *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
                emptyString_, status) == UCOL_EQUAL) {
            firstCharsInScripts_->removeElementAt(0);
        } else {
            break;
        }
    }

    // Chinese index characters, which are specific to each of the several Chinese tailorings,
    // take precedence over the single locale data exemplar set per language.
    if (!addChineseIndexCharacters(status) && locale != NULL) {
        addIndexExemplars(*locale, status);
    }
}
void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
    if (U_FAILURE(status)) { return; }
    if (locale == NULL && collator_ == NULL) {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }

    initialLabels_         = new UnicodeSet();
    if (initialLabels_ == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }

    inflowLabel_.setTo((UChar)0x2026);    // Ellipsis
    overflowLabel_ = inflowLabel_;
    underflowLabel_ = inflowLabel_;

    if (collator_ == NULL) {
        collator_ = static_cast<RuleBasedCollator *>(Collator::createInstance(*locale, status));
        if (U_FAILURE(status)) { return; }
        if (collator_ == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
    }
    collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
    if (collatorPrimaryOnly_ == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
    firstCharsInScripts_ = firstStringsInScript(status);
    if (U_FAILURE(status)) { return; }
    firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
    UnicodeString _4E00((UChar)0x4E00);
    UnicodeString _1100((UChar)0x1100);
    UnicodeString _1112((UChar)0x1112);
    if (collatorPrimaryOnly_->compare(_4E00, _1112, status) <= 0 &&
            collatorPrimaryOnly_->compare(_1100, _4E00, status) <= 0) {
        // The standard Korean tailoring sorts Hanja (Han characters)
        // as secondary differences from Hangul syllables.
        // This makes U+4E00 not useful as a Han-script boundary.
        // TODO: This becomes obsolete when the root collator gets
        // reliable script-first-primary mappings.
        int32_t hanIndex = binarySearch(
                *firstCharsInScripts_, _4E00, *collatorPrimaryOnly_);
        if (hanIndex >= 0) {
            firstCharsInScripts_->removeElementAt(hanIndex);
        }
    }
    // Guard against a degenerate collator where
    // some script boundary strings are primary ignorable.
    for (;;) {
        if (U_FAILURE(status)) { return; }
        if (firstCharsInScripts_->isEmpty()) {
            // AlphabeticIndex requires some non-ignorable script boundary strings.
            status = U_ILLEGAL_ARGUMENT_ERROR;
            return;
        }
        if (collatorPrimaryOnly_->compare(
                *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
                emptyString_, status) == UCOL_EQUAL) {
            firstCharsInScripts_->removeElementAt(0);
        } else {
            break;
        }
    }

    if (locale != NULL) {
        addIndexExemplars(*locale, status);
    }
}