void AlphabeticIndex::buildBucketList(UErrorCode &status) { UnicodeString labelStr = getUnderflowLabel(); Bucket *b = new Bucket(labelStr, *EMPTY_STRING, U_ALPHAINDEX_UNDERFLOW, status); bucketList_->addElement(b, status); // Build up the list, adding underflow, additions, overflow // insert infix labels as needed, using \uFFFF. const UnicodeString *last = static_cast<UnicodeString *>(labels_->elementAt(0)); b = new Bucket(*last, *last, U_ALPHAINDEX_NORMAL, status); bucketList_->addElement(b, status); UnicodeSet lastSet; UnicodeSet set; AlphabeticIndex::getScriptSet(lastSet, *last, status); lastSet.removeAll(*IGNORE_SCRIPTS); for (int i = 1; i < labels_->size(); ++i) { UnicodeString *current = static_cast<UnicodeString *>(labels_->elementAt(i)); getScriptSet(set, *current, status); set.removeAll(*IGNORE_SCRIPTS); if (lastSet.containsNone(set)) { // check for adjacent const UnicodeString &overflowComparisonString = getOverflowComparisonString(*last, status); if (collatorPrimaryOnly_->compare(overflowComparisonString, *current) < 0) { labelStr = getInflowLabel(); b = new Bucket(labelStr, overflowComparisonString, U_ALPHAINDEX_INFLOW, status); bucketList_->addElement(b, status); i++; lastSet = set; } } b = new Bucket(*current, *current, U_ALPHAINDEX_NORMAL, status); bucketList_->addElement(b, status); last = current; lastSet = set; } const UnicodeString &limitString = getOverflowComparisonString(*last, status); b = new Bucket(getOverflowLabel(), limitString, U_ALPHAINDEX_OVERFLOW, status); bucketList_->addElement(b, status); // final overflow bucket }
BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const { // Initialize indexCharacters. UVector indexCharacters(errorCode); indexCharacters.setDeleter(uprv_deleteUObject); initLabels(indexCharacters, errorCode); if (U_FAILURE(errorCode)) { return NULL; } // Variables for hasMultiplePrimaryWeights(). UVector64 ces(errorCode); uint32_t variableTop; if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) { variableTop = collatorPrimaryOnly_->getVariableTop(errorCode); } else { variableTop = 0; } UBool hasInvisibleBuckets = FALSE; // Helper arrays for Chinese Pinyin collation. Bucket *asciiBuckets[26] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }; Bucket *pinyinBuckets[26] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }; UBool hasPinyin = FALSE; LocalPointer<UVector> bucketList(new UVector(errorCode), errorCode); if (U_FAILURE(errorCode)) { return NULL; } bucketList->setDeleter(uprv_deleteUObject); // underflow bucket Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW); if (bucket == NULL) { errorCode = U_MEMORY_ALLOCATION_ERROR; return NULL; } bucketList->addElement(bucket, errorCode); if (U_FAILURE(errorCode)) { return NULL; } UnicodeString temp; // fix up the list, adding underflow, additions, overflow // Insert inflow labels as needed. int32_t scriptIndex = -1; const UnicodeString *scriptUpperBoundary = &emptyString_; for (int32_t i = 0; i < indexCharacters.size(); ++i) { UnicodeString ¤t = *getString(indexCharacters, i); if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) { // We crossed the script boundary into a new script. const UnicodeString &inflowBoundary = *scriptUpperBoundary; UBool skippedScript = FALSE; for (;;) { scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex); if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) { break; } skippedScript = TRUE; } if (skippedScript && bucketList->size() > 1) { // We are skipping one or more scripts, // and we are not just getting out of the underflow label. bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW); if (bucket == NULL) { errorCode = U_MEMORY_ALLOCATION_ERROR; return NULL; } bucketList->addElement(bucket, errorCode); } } // Add a bucket with the current label. bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL); if (bucket == NULL) { errorCode = U_MEMORY_ALLOCATION_ERROR; return NULL; } bucketList->addElement(bucket, errorCode); // Remember ASCII and Pinyin buckets for Pinyin redirects. UChar c; if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) { // A-Z asciiBuckets[c - 0x41] = bucket; } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) && 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) { pinyinBuckets[c - 0x41] = bucket; hasPinyin = TRUE; } // Check for multiple primary weights. if (!current.startsWith(BASE, BASE_LENGTH) && hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current, ces, errorCode) && current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) { // "AE-ligature" or "Sch" etc. for (int32_t i = bucketList->size() - 2;; --i) { Bucket *singleBucket = getBucket(*bucketList, i); if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) { // There is no single-character bucket since the last // underflow or inflow label. break; } if (singleBucket->displayBucket_ == NULL && !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, singleBucket->lowerBoundary_, ces, errorCode)) { // Add an invisible bucket that redirects strings greater than the expansion // to the previous single-character bucket. // For example, after ... Q R S Sch we add Sch\uFFFF->S // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S. bucket = new Bucket(emptyString_, UnicodeString(current).append((UChar)0xFFFF), U_ALPHAINDEX_NORMAL); if (bucket == NULL) { errorCode = U_MEMORY_ALLOCATION_ERROR; return NULL; } bucket->displayBucket_ = singleBucket; bucketList->addElement(bucket, errorCode); hasInvisibleBuckets = TRUE; break; } } } } if (U_FAILURE(errorCode)) { return NULL; } if (bucketList->size() == 1) { // No real labels, show only the underflow label. BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias()); if (bl == NULL) { errorCode = U_MEMORY_ALLOCATION_ERROR; return NULL; } bucketList.orphan(); return bl; } // overflow bucket bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW); if (bucket == NULL) { errorCode = U_MEMORY_ALLOCATION_ERROR; return NULL; } bucketList->addElement(bucket, errorCode); // final if (hasPinyin) { // Redirect Pinyin buckets. Bucket *asciiBucket = NULL; for (int32_t i = 0; i < 26; ++i) { if (asciiBuckets[i] != NULL) { asciiBucket = asciiBuckets[i]; } if (pinyinBuckets[i] != NULL && asciiBucket != NULL) { pinyinBuckets[i]->displayBucket_ = asciiBucket; hasInvisibleBuckets = TRUE; } } } if (U_FAILURE(errorCode)) { return NULL; } if (!hasInvisibleBuckets) { BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias()); if (bl == NULL) { errorCode = U_MEMORY_ALLOCATION_ERROR; return NULL; } bucketList.orphan(); return bl; } // Merge inflow buckets that are visually adjacent. // Iterate backwards: Merge inflow into overflow rather than the other way around. int32_t i = bucketList->size() - 1; Bucket *nextBucket = getBucket(*bucketList, i); while (--i > 0) { bucket = getBucket(*bucketList, i); if (bucket->displayBucket_ != NULL) { continue; // skip invisible buckets } if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) { if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) { bucket->displayBucket_ = nextBucket; continue; } } nextBucket = bucket; } LocalPointer<UVector> publicBucketList(new UVector(errorCode), errorCode); if (U_FAILURE(errorCode)) { return NULL; } // Do not call publicBucketList->setDeleter(): // This vector shares its objects with the bucketList. for (int32_t i = 0; i < bucketList->size(); ++i) { bucket = getBucket(*bucketList, i); if (bucket->displayBucket_ == NULL) { publicBucketList->addElement(bucket, errorCode); } } if (U_FAILURE(errorCode)) { return NULL; } BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias()); if (bl == NULL) { errorCode = U_MEMORY_ALLOCATION_ERROR; return NULL; } bucketList.orphan(); publicBucketList.orphan(); return bl; }