//----------------------------------------------------------------------------- // // sortedAdd Add a value to a vector of sorted values (ints). // Do not replicate entries; if the value is already there, do not // add a second one. // Lazily create the vector if it does not already exist. // //----------------------------------------------------------------------------- void RBBITableBuilder::sortedAdd(UVector ** vector, int32_t val) { int32_t i; if (*vector == NULL) { *vector = new UVector(*fStatus); } if (*vector == NULL || U_FAILURE(*fStatus)) { return; } UVector * vec = *vector; int32_t vSize = vec->size(); for (i = 0; i < vSize; i++) { int32_t valAtI = vec->elementAti(i); if (valAtI == val) { // The value is already in the vector. Don't add it again. return; } if (valAtI > val) { break; } } vec->insertElementAt(val, i, *fStatus); }
/** * Register a source-target/variant in the specDAG. Variant may be * empty, but source and target must not be. If variant is empty then * the special variant NO_VARIANT is stored in slot zero of the * UVector of variants. */ void TransliteratorRegistry::registerSTV(const UnicodeString& source, const UnicodeString& target, const UnicodeString& variant) { // assert(source.length() > 0); // assert(target.length() > 0); UErrorCode status = U_ZERO_ERROR; Hashtable *targets = (Hashtable*) specDAG.get(source); if (targets == 0) { targets = new Hashtable(TRUE, status); if (U_FAILURE(status) || targets == 0) { return; } targets->setValueDeleter(uprv_deleteUObject); specDAG.put(source, targets, status); } UVector *variants = (UVector*) targets->get(target); if (variants == 0) { variants = new UVector(uprv_deleteUObject, uhash_compareCaselessUnicodeString, status); if (variants == 0) { return; } targets->put(target, variants, status); } // assert(NO_VARIANT == ""); // We add the variant string. If it is the special "no variant" // string, that is, the empty string, we add it at position zero. if (!variants->contains((void*) &variant)) { UnicodeString *tempus; // Used for null pointer check. if (variant.length() > 0) { tempus = new UnicodeString(variant); if (tempus != NULL) { variants->addElement(tempus, status); } } else { tempus = new UnicodeString(); // = NO_VARIANT if (tempus != NULL) { variants->insertElementAt(tempus, 0, status); } } } }
U_CDECL_END /** * Parse a compound ID, consisting of an optional forward global * filter, a separator, one or more single IDs delimited by * separators, an an optional reverse global filter. The * separator is a semicolon. The global filters are UnicodeSet * patterns. The reverse global filter must be enclosed in * parentheses. * @param id the pattern the parse * @param dir the direction. * @param canonID OUTPUT parameter that receives the canonical ID, * consisting of canonical IDs for all elements, as returned by * parseSingleID(), separated by semicolons. Previous contents * are discarded. * @param list OUTPUT parameter that receives a list of SingleID * objects representing the parsed IDs. Previous contents are * discarded. * @param globalFilter OUTPUT parameter that receives a pointer to * a newly created global filter for this ID in this direction, or * NULL if there is none. * @return TRUE if the parse succeeds, that is, if the entire * id is consumed without syntax error. */ UBool TransliteratorIDParser::parseCompoundID(const UnicodeString & id, int32_t dir, UnicodeString & canonID, UVector & list, UnicodeSet *& globalFilter) { UErrorCode ec = U_ZERO_ERROR; int32_t i; int32_t pos = 0; int32_t withParens = 1; list.removeAllElements(); UnicodeSet * filter; globalFilter = NULL; canonID.truncate(0); // Parse leading global filter, if any withParens = 0; // parens disallowed filter = parseGlobalFilter(id, pos, dir, withParens, &canonID); if (filter != NULL) { if (!ICU_Utility::parseChar(id, pos, ID_DELIM)) { // Not a global filter; backup and resume canonID.truncate(0); pos = 0; } if (dir == FORWARD) { globalFilter = filter; } else { delete filter; } filter = NULL; } UBool sawDelimiter = TRUE; for (;;) { SingleID * single = parseSingleID(id, pos, dir, ec); if (single == NULL) { break; } if (dir == FORWARD) { list.addElement(single, ec); } else { list.insertElementAt(single, 0, ec); } if (U_FAILURE(ec)) { goto FAIL; } if (!ICU_Utility::parseChar(id, pos, ID_DELIM)) { sawDelimiter = FALSE; break; } } if (list.size() == 0) { goto FAIL; } // Construct canonical ID for (i = 0; i < list.size(); ++i) { SingleID * single = (SingleID *) list.elementAt(i); canonID.append(single->canonID); if (i != (list.size() - 1)) { canonID.append(ID_DELIM); } } // Parse trailing global filter, if any, and only if we saw // a trailing delimiter after the IDs. if (sawDelimiter) { withParens = 1; // parens required filter = parseGlobalFilter(id, pos, dir, withParens, &canonID); if (filter != NULL) { // Don't require trailing ';', but parse it if present ICU_Utility::parseChar(id, pos, ID_DELIM); if (dir == REVERSE) { globalFilter = filter; } else { delete filter; } filter = NULL; } } // Trailing unparsed text is a syntax error ICU_Utility::skipWhitespace(id, pos, TRUE); if (pos != id.length()) { goto FAIL; } return TRUE; FAIL: UObjectDeleter * save = list.setDeleter(_deleteSingleID); list.removeAllElements(); list.setDeleter(save); delete globalFilter; globalFilter = NULL; return FALSE; }
void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const { const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode); if (U_FAILURE(errorCode)) { return; } const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0); const UnicodeString &overflowBoundary = *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1); // We make a sorted array of elements. // Some of the input may be redundant. // That is, we might have c, ch, d, where "ch" sorts just like "c", "h". // We filter out those cases. UnicodeSetIterator iter(*initialLabels_); while (iter.next()) { const UnicodeString *item = &iter.getString(); LocalPointer<UnicodeString> ownedItem; UBool checkDistinct; int32_t itemLength = item->length(); if (!item->hasMoreChar32Than(0, itemLength, 1)) { checkDistinct = FALSE; } else if(item->charAt(itemLength - 1) == 0x2a && // '*' item->charAt(itemLength - 2) != 0x2a) { // Use a label if it is marked with one trailing star, // even if the label string sorts the same when all contractions are suppressed. ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1)); item = ownedItem.getAlias(); if (item == NULL) { errorCode = U_MEMORY_ALLOCATION_ERROR; return; } checkDistinct = FALSE; } else { checkDistinct = TRUE; } if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) { // Ignore a primary-ignorable or non-alphabetic index character. } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) { // Ignore an index character that will land in the overflow bucket. } else if (checkDistinct && collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) { // Ignore a multi-code point index character that does not sort distinctly // from the sequence of its separate characters. } else { int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_); if (insertionPoint < 0) { indexCharacters.insertElementAt( ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode); } else { const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint); if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) { indexCharacters.setElementAt( ownedString(*item, ownedItem, errorCode), insertionPoint); } } } } if (U_FAILURE(errorCode)) { return; } // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element int32_t size = indexCharacters.size() - 1; if (size > maxLabelCount_) { int32_t count = 0; int32_t old = -1; for (int32_t i = 0; i < indexCharacters.size();) { ++count; int32_t bump = count * maxLabelCount_ / size; if (bump == old) { indexCharacters.removeElementAt(i); } else { old = bump; ++i; } } } }