IdentifierInfo &IdentifierInfo::clear() { fRequiredScripts->resetAll(); uhash_removeAll(fScriptSetSet); fNumerics->clear(); fCommonAmongAlternates->resetAll(); return *this; }
static void TestOtherAPI(void){ UErrorCode status = U_ZERO_ERROR; UHashtable *hash; /* Use the correct type when cast to void * */ const UChar one[4] = {0x006F, 0x006E, 0x0065, 0}; /* L"one" */ const UChar one2[4] = {0x006F, 0x006E, 0x0065, 0}; /* Get around compiler optimizations */ const UChar two[4] = {0x0074, 0x0077, 0x006F, 0}; /* L"two" */ const UChar two2[4] = {0x0074, 0x0077, 0x006F, 0}; /* L"two" */ const UChar three[6] = {0x0074, 0x0068, 0x0072, 0x0065, 0x0065, 0}; /* L"three" */ const UChar four[6] = {0x0066, 0x006F, 0x0075, 0x0072, 0}; /* L"four" */ const UChar five[6] = {0x0066, 0x0069, 0x0076, 0x0065, 0}; /* L"five" */ const UChar five2[6] = {0x0066, 0x0069, 0x0076, 0x0065, 0}; /* L"five" */ hash = uhash_open(uhash_hashUChars, uhash_compareUChars, &status); if (U_FAILURE(status)) { log_err("FAIL: uhash_open failed with %s and returned 0x%08x\n", u_errorName(status), hash); return; } if (hash == NULL) { log_err("FAIL: uhash_open returned NULL\n"); return; } log_verbose("Ok: uhash_open returned 0x%08X\n", hash); uhash_puti(hash, (void*)one, 1, &status); if(uhash_count(hash) != 1){ log_err("FAIL: uhas_count() failed. Expected: 1, Got: %d\n", uhash_count(hash)); } uhash_puti(hash, (void*)two, 2, &status); uhash_puti(hash, (void*)three, 3, &status); uhash_puti(hash, (void*)four, 4, &status); uhash_puti(hash, (void*)five, 5, &status); if(uhash_count(hash) != 5){ log_err("FAIL: uhas_count() failed. Expected: 5, Got: %d\n", uhash_count(hash)); } if(uhash_geti(hash, (void*)two2) != 2){ log_err("FAIL: uhash_geti failed\n"); } if(uhash_removei(hash, (void*)five2) != 5){ log_err("FAIL: uhash_remove() failed\n"); } if(uhash_count(hash) != 4){ log_err("FAIL: uhas_count() failed. Expected: 4, Got: %d\n", uhash_count(hash)); } uhash_put(hash, (void*)one, NULL, &status); if(uhash_count(hash) != 3){ log_err("FAIL: uhash_put() with value=NULL didn't remove the key value pair\n"); } status=U_ILLEGAL_ARGUMENT_ERROR; uhash_puti(hash, (void*)one, 1, &status); if(uhash_count(hash) != 3){ log_err("FAIL: uhash_put() with value!=NULL should fail when status != U_ZERO_ERROR \n"); } status=U_ZERO_ERROR; uhash_puti(hash, (void*)one, 1, &status); if(uhash_count(hash) != 4){ log_err("FAIL: uhash_put() with value!=NULL didn't replace the key value pair\n"); } if(_compareUChars((void*)one, (void*)two) == TRUE || _compareUChars((void*)one, (void*)one) != TRUE || _compareUChars((void*)one, (void*)one2) != TRUE || _compareUChars((void*)one, NULL) == TRUE ) { log_err("FAIL: compareUChars failed\n"); } uhash_removeAll(hash); if(uhash_count(hash) != 0){ log_err("FAIL: uhas_count() failed. Expected: 0, Got: %d\n", uhash_count(hash)); } uhash_setKeyComparator(hash, uhash_compareLong); uhash_setKeyHasher(hash, uhash_hashLong); uhash_iputi(hash, 1001, 1, &status); uhash_iputi(hash, 1002, 2, &status); uhash_iputi(hash, 1003, 3, &status); if(_compareLong(1001, 1002) == TRUE || _compareLong(1001, 1001) != TRUE || _compareLong(1001, 0) == TRUE ) { log_err("FAIL: compareLong failed\n"); } /*set the resize policy to just GROW and SHRINK*/ /*how to test this??*/ uhash_setResizePolicy(hash, U_GROW_AND_SHRINK); uhash_iputi(hash, 1004, 4, &status); uhash_iputi(hash, 1005, 5, &status); uhash_iputi(hash, 1006, 6, &status); if(uhash_count(hash) != 6){ log_err("FAIL: uhash_count() failed. Expected: 6, Got: %d\n", uhash_count(hash)); } if(uhash_iremovei(hash, 1004) != 4){ log_err("FAIL: uhash_remove failed\n"); } if(uhash_iremovei(hash, 1004) != 0){ log_err("FAIL: uhash_remove failed\n"); } uhash_close(hash); }
void AlphabeticIndex::buildIndex(UErrorCode &status) { if (U_FAILURE(status)) { return; } if (!indexBuildRequired_) { return; } // Discard any already-built data. // This is important when the user builds and uses an index, then subsequently modifies it, // necessitating a rebuild. bucketList_->removeAllElements(); labels_->removeAllElements(); uhash_removeAll(alreadyIn_); noDistinctSorting_->clear(); notAlphabetic_->clear(); // first sort the incoming Labels, with a "best" ordering among items // that are the same according to the collator UVector preferenceSorting(status); // Vector of UnicodeStrings; owned by the vector. preferenceSorting.setDeleter(uprv_deleteUObject); appendUnicodeSetToUVector(preferenceSorting, *initialLabels_, status); preferenceSorting.sortWithUComparator(PreferenceComparator, &status, status); // We now make a set of Labels. // Some of the input may, however, be redundant. // That is, we might have c, ch, d, where "ch" sorts just like "c", "h" // So we make a pass through, filtering out those cases. // TODO: filtering these out would seem to be at odds with the eventual goal // of being able to split buckets that contain too many items. UnicodeSet labelSet; for (int32_t psIndex=0; psIndex<preferenceSorting.size(); psIndex++) { UnicodeString item = *static_cast<const UnicodeString *>(preferenceSorting.elementAt(psIndex)); // TODO: Since preferenceSorting was originally populated from the contents of a UnicodeSet, // is it even possible for duplicates to show up in this check? if (labelSet.contains(item)) { UnicodeSetIterator itemAlreadyInIter(labelSet); while (itemAlreadyInIter.next()) { const UnicodeString &itemAlreadyIn = itemAlreadyInIter.getString(); if (collatorPrimaryOnly_->compare(item, itemAlreadyIn) == 0) { UnicodeSet *targets = static_cast<UnicodeSet *>(uhash_get(alreadyIn_, &itemAlreadyIn)); if (targets == NULL) { // alreadyIn.put(itemAlreadyIn, targets = new LinkedHashSet<String>()); targets = new UnicodeSet(); uhash_put(alreadyIn_, itemAlreadyIn.clone(), targets, &status); } targets->add(item); break; } } } else if (item.moveIndex32(0, 1) < item.length() && // Label contains more than one code point. collatorPrimaryOnly_->compare(item, separated(item)) == 0) { noDistinctSorting_->add(item); } else if (!ALPHABETIC->containsSome(item)) { notAlphabetic_->add(item); } else { labelSet.add(item); } } // If we have no labels, hard-code a fallback default set of [A-Z] // This case can occur with locales that don't have exemplar character data, including root. // A no-labels situation will cause other problems; it needs to be avoided. if (labelSet.isEmpty()) { labelSet.add((UChar32)0x41, (UChar32)0x5A); } // Move the set of Labels from the set into a vector, and sort // according to the collator. appendUnicodeSetToUVector(*labels_, labelSet, status); labels_->sortWithUComparator(sortCollateComparator, collatorPrimaryOnly_, status); // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element // Implemented by copying the elements to be retained to a new UVector. const int32_t size = labelSet.size() - 1; if (size > maxLabelCount_) { UVector *newLabels = new UVector(status); newLabels->setDeleter(uprv_deleteUObject); int32_t count = 0; int32_t old = -1; for (int32_t srcIndex=0; srcIndex<labels_->size(); srcIndex++) { const UnicodeString *str = static_cast<const UnicodeString *>(labels_->elementAt(srcIndex)); ++count; const int32_t bump = count * maxLabelCount_ / size; if (bump == old) { // it.remove(); } else { newLabels->addElement(str->clone(), status); old = bump; } } delete labels_; labels_ = newLabels; } // We now know the list of labels. // Create a corresponding list of buckets, one per label. buildBucketList(status); // Corresponds to Java BucketList constructor. // Bin the Records into the Buckets. bucketRecords(status); indexBuildRequired_ = FALSE; resetBucketIterator(status); }