void AlphabeticIndex::buildBucketList(UErrorCode &status) {
    UnicodeString labelStr = getUnderflowLabel();
    Bucket *b = new Bucket(labelStr, *EMPTY_STRING, U_ALPHAINDEX_UNDERFLOW, status);
    bucketList_->addElement(b, status);

    // Build up the list, adding underflow, additions, overflow
    // insert infix labels as needed, using \uFFFF.
    const UnicodeString *last = static_cast<UnicodeString *>(labels_->elementAt(0));
    b = new Bucket(*last, *last, U_ALPHAINDEX_NORMAL, status);
    bucketList_->addElement(b, status);

    UnicodeSet lastSet;
    UnicodeSet set;
    AlphabeticIndex::getScriptSet(lastSet, *last, status);
    lastSet.removeAll(*IGNORE_SCRIPTS);

    for (int i = 1; i < labels_->size(); ++i) {
        UnicodeString *current = static_cast<UnicodeString *>(labels_->elementAt(i));
        getScriptSet(set, *current, status);
        set.removeAll(*IGNORE_SCRIPTS);
        if (lastSet.containsNone(set)) {
            // check for adjacent
            const UnicodeString &overflowComparisonString = getOverflowComparisonString(*last, status);
            if (collatorPrimaryOnly_->compare(overflowComparisonString, *current) < 0) {
                labelStr = getInflowLabel();
                b = new Bucket(labelStr, overflowComparisonString, U_ALPHAINDEX_INFLOW, status);
                bucketList_->addElement(b, status);
                i++;
                lastSet = set;
            }
        }
        b = new Bucket(*current, *current, U_ALPHAINDEX_NORMAL, status);
        bucketList_->addElement(b, status);
        last = current;
        lastSet = set;
    }
    const UnicodeString &limitString = getOverflowComparisonString(*last, status);
    b = new Bucket(getOverflowLabel(), limitString, U_ALPHAINDEX_OVERFLOW, status);
    bucketList_->addElement(b, status);
    // final overflow bucket
}
Exemple #2
0
BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
    // Initialize indexCharacters.
    UVector indexCharacters(errorCode);
    indexCharacters.setDeleter(uprv_deleteUObject);
    initLabels(indexCharacters, errorCode);
    if (U_FAILURE(errorCode)) { return NULL; }

    // Variables for hasMultiplePrimaryWeights().
    UVector64 ces(errorCode);
    uint32_t variableTop;
    if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
        variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
    } else {
        variableTop = 0;
    }
    UBool hasInvisibleBuckets = FALSE;

    // Helper arrays for Chinese Pinyin collation.
    Bucket *asciiBuckets[26] = {
        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
    };
    Bucket *pinyinBuckets[26] = {
        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
    };
    UBool hasPinyin = FALSE;

    LocalPointer<UVector> bucketList(new UVector(errorCode), errorCode);
    if (U_FAILURE(errorCode)) {
        return NULL;
    }
    bucketList->setDeleter(uprv_deleteUObject);

    // underflow bucket
    Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
    if (bucket == NULL) {
        errorCode = U_MEMORY_ALLOCATION_ERROR;
        return NULL;
    }
    bucketList->addElement(bucket, errorCode);
    if (U_FAILURE(errorCode)) { return NULL; }

    UnicodeString temp;

    // fix up the list, adding underflow, additions, overflow
    // Insert inflow labels as needed.
    int32_t scriptIndex = -1;
    const UnicodeString *scriptUpperBoundary = &emptyString_;
    for (int32_t i = 0; i < indexCharacters.size(); ++i) {
        UnicodeString &current = *getString(indexCharacters, i);
        if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
            // We crossed the script boundary into a new script.
            const UnicodeString &inflowBoundary = *scriptUpperBoundary;
            UBool skippedScript = FALSE;
            for (;;) {
                scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
                if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
                    break;
                }
                skippedScript = TRUE;
            }
            if (skippedScript && bucketList->size() > 1) {
                // We are skipping one or more scripts,
                // and we are not just getting out of the underflow label.
                bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
                if (bucket == NULL) {
                    errorCode = U_MEMORY_ALLOCATION_ERROR;
                    return NULL;
                }
                bucketList->addElement(bucket, errorCode);
            }
        }
        // Add a bucket with the current label.
        bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
        if (bucket == NULL) {
            errorCode = U_MEMORY_ALLOCATION_ERROR;
            return NULL;
        }
        bucketList->addElement(bucket, errorCode);
        // Remember ASCII and Pinyin buckets for Pinyin redirects.
        UChar c;
        if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) {  // A-Z
            asciiBuckets[c - 0x41] = bucket;
        } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
                0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
            pinyinBuckets[c - 0x41] = bucket;
            hasPinyin = TRUE;
        }
        // Check for multiple primary weights.
        if (!current.startsWith(BASE, BASE_LENGTH) &&
                hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
                                          ces, errorCode) &&
                current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
            // "AE-ligature" or "Sch" etc.
            for (int32_t i = bucketList->size() - 2;; --i) {
                Bucket *singleBucket = getBucket(*bucketList, i);
                if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
                    // There is no single-character bucket since the last
                    // underflow or inflow label.
                    break;
                }
                if (singleBucket->displayBucket_ == NULL &&
                        !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
                                                   singleBucket->lowerBoundary_,
                                                   ces, errorCode)) {
                    // Add an invisible bucket that redirects strings greater than the expansion
                    // to the previous single-character bucket.
                    // For example, after ... Q R S Sch we add Sch\uFFFF->S
                    // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
                    bucket = new Bucket(emptyString_,
                        UnicodeString(current).append((UChar)0xFFFF),
                        U_ALPHAINDEX_NORMAL);
                    if (bucket == NULL) {
                        errorCode = U_MEMORY_ALLOCATION_ERROR;
                        return NULL;
                    }
                    bucket->displayBucket_ = singleBucket;
                    bucketList->addElement(bucket, errorCode);
                    hasInvisibleBuckets = TRUE;
                    break;
                }
            }
        }
    }
    if (U_FAILURE(errorCode)) { return NULL; }
    if (bucketList->size() == 1) {
        // No real labels, show only the underflow label.
        BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
        if (bl == NULL) {
            errorCode = U_MEMORY_ALLOCATION_ERROR;
            return NULL;
        }
        bucketList.orphan();
        return bl;
    }
    // overflow bucket
    bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
    if (bucket == NULL) {
        errorCode = U_MEMORY_ALLOCATION_ERROR;
        return NULL;
    }
    bucketList->addElement(bucket, errorCode); // final

    if (hasPinyin) {
        // Redirect Pinyin buckets.
        Bucket *asciiBucket = NULL;
        for (int32_t i = 0; i < 26; ++i) {
            if (asciiBuckets[i] != NULL) {
                asciiBucket = asciiBuckets[i];
            }
            if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
                pinyinBuckets[i]->displayBucket_ = asciiBucket;
                hasInvisibleBuckets = TRUE;
            }
        }
    }

    if (U_FAILURE(errorCode)) { return NULL; }
    if (!hasInvisibleBuckets) {
        BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
        if (bl == NULL) {
            errorCode = U_MEMORY_ALLOCATION_ERROR;
            return NULL;
        }
        bucketList.orphan();
        return bl;
    }
    // Merge inflow buckets that are visually adjacent.
    // Iterate backwards: Merge inflow into overflow rather than the other way around.
    int32_t i = bucketList->size() - 1;
    Bucket *nextBucket = getBucket(*bucketList, i);
    while (--i > 0) {
        bucket = getBucket(*bucketList, i);
        if (bucket->displayBucket_ != NULL) {
            continue;  // skip invisible buckets
        }
        if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
            if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
                bucket->displayBucket_ = nextBucket;
                continue;
            }
        }
        nextBucket = bucket;
    }

    LocalPointer<UVector> publicBucketList(new UVector(errorCode), errorCode);
    if (U_FAILURE(errorCode)) {
        return NULL;
    }
    // Do not call publicBucketList->setDeleter():
    // This vector shares its objects with the bucketList.
    for (int32_t i = 0; i < bucketList->size(); ++i) {
        bucket = getBucket(*bucketList, i);
        if (bucket->displayBucket_ == NULL) {
            publicBucketList->addElement(bucket, errorCode);
        }
    }
    if (U_FAILURE(errorCode)) { return NULL; }
    BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
    if (bl == NULL) {
        errorCode = U_MEMORY_ALLOCATION_ERROR;
        return NULL;
    }
    bucketList.orphan();
    publicBucketList.orphan();
    return bl;
}