int32_t TimeZoneGenericNameMatchInfo::size() const { if (fMatches == NULL) { return 0; } return fMatches->size(); }
void updateVisibleIDs(Hashtable& result, UErrorCode& status) const { if (U_SUCCESS(_status)) { for (int32_t i = 0; i < _ids.size(); ++i) { result.put(*(UnicodeString*)_ids[i], (void*)this, status); } } }
UnicodeString& U_EXPORT2 ZoneMeta::getZoneIdByMetazone(const UnicodeString &mzid, const UnicodeString ®ion, UnicodeString &result) { initializeMetaToOlson(); UBool isSet = FALSE; if (gMetaToOlson != NULL) { UErrorCode status = U_ZERO_ERROR; UChar mzidUChars[ZID_KEY_MAX]; mzid.extract(mzidUChars, ZID_KEY_MAX, status); if (U_SUCCESS(status) && status!=U_STRING_NOT_TERMINATED_WARNING) { UVector *mappings = (UVector*)uhash_get(gMetaToOlson, mzidUChars); if (mappings != NULL) { // Find a preferred time zone for the given region. for (int32_t i = 0; i < mappings->size(); i++) { MetaToOlsonMappingEntry *olsonmap = (MetaToOlsonMappingEntry*)mappings->elementAt(i); if (region.compare(olsonmap->territory, -1) == 0) { result.setTo(olsonmap->id); isSet = TRUE; break; } else if (u_strcmp(olsonmap->territory, gWorld) == 0) { result.setTo(olsonmap->id); isSet = TRUE; } } } } } if (!isSet) { result.remove(); } return result; }
void CompactData::getUniquePatterns(UVector &output, UErrorCode &status) const { U_ASSERT(output.isEmpty()); // NOTE: In C++, this is done more manually with a UVector. // In Java, we can take advantage of JDK HashSet. for (auto pattern : patterns) { if (pattern == nullptr || pattern == USE_FALLBACK) { continue; } // Insert pattern into the UVector if the UVector does not already contain the pattern. // Search the UVector from the end since identical patterns are likely to be adjacent. for (int32_t i = output.size() - 1; i >= 0; i--) { if (u_strcmp(pattern, static_cast<const UChar *>(output[i])) == 0) { goto continue_outer; } } // The string was not found; add it to the UVector. // ANDY: This requires a const_cast. Why? output.addElement(const_cast<UChar *>(pattern), status); continue_outer: continue; } }
//----------------------------------------------------------------------------- // // sortedAdd Add a value to a vector of sorted values (ints). // Do not replicate entries; if the value is already there, do not // add a second one. // Lazily create the vector if it does not already exist. // //----------------------------------------------------------------------------- void RBBITableBuilder::sortedAdd(UVector ** vector, int32_t val) { int32_t i; if (*vector == NULL) { *vector = new UVector(*fStatus); } if (*vector == NULL || U_FAILURE(*fStatus)) { return; } UVector * vec = *vector; int32_t vSize = vec->size(); for (i = 0; i < vSize; i++) { int32_t valAtI = vec->elementAti(i); if (valAtI == val) { // The value is already in the vector. Don't add it again. return; } if (valAtI > val) { break; } } vec->insertElementAt(val, i, *fStatus); }
UVector* RuleBasedTimeZone::copyRules(UVector* source) { if (source == NULL) { return NULL; } UErrorCode ec = U_ZERO_ERROR; int32_t size = source->size(); UVector *rules = new UVector(size, ec); if (U_FAILURE(ec)) { return NULL; } int32_t i; for (i = 0; i < size; i++) { rules->addElement(((TimeZoneRule*)source->elementAt(i))->clone(), ec); if (U_FAILURE(ec)) { break; } } if (U_FAILURE(ec)) { // In case of error, clean up for (i = 0; i < rules->size(); i++) { TimeZoneRule *rule = (TimeZoneRule*)rules->orphanElementAt(i); delete rule; } delete rules; return NULL; } return rules; }
UBool UVector::containsNone(const UVector& other) const { for (int32_t i=0; i<other.size(); ++i) { if (indexOf(other.elements[i]) >= 0) { return FALSE; } } return TRUE; }
int32_t TransliteratorRegistry::countAvailableVariants(const UnicodeString& source, const UnicodeString& target) const { Hashtable *targets = (Hashtable*) specDAG.get(source); if (targets == 0) { return 0; } UVector *variants = (UVector*) targets->get(target); // variants may be 0 if the source/target are invalid return (variants == 0) ? 0 : variants->size(); }
UBool UVector::removeAll(const UVector& other) { UBool changed = FALSE; for (int32_t i=0; i<other.size(); ++i) { int32_t j = indexOf(other.elements[i]); if (j >= 0) { removeElementAt(j); changed = TRUE; } } return changed; }
uint32_t UXML2Document::getElement(UVector<UString>& velement, const char* tag, uint32_t tag_len) { U_TRACE(0, "UXML2Document::getElement(%p,%.*S,%u)", &velement, tag_len, tag, tag_len) U_INTERNAL_ASSERT_POINTER(tag) UString element; uint32_t n = velement.size(), pos = 0; while (true) { pos = getElement(element, pos, tag, tag_len); if (pos == U_NOT_FOUND) break; velement.push(element); } uint32_t result = velement.size() - n; U_RETURN(result); }
/** * Finish constructing a transliterator: only to be called by * constructors. Before calling init(), set trans and filter to NULL. * @param list a vector of transliterator objects to be adopted. It * should NOT be empty. The list should be in declared order. That * is, it should be in the FORWARD order; if direction is REVERSE then * the list order will be reversed. * @param direction either FORWARD or REVERSE * @param fixReverseID if TRUE, then reconstruct the ID of reverse * entries by calling getID() of component entries. Some constructors * do not require this because they apply a facade ID anyway. * @param status the error code indicating success or failure */ void CompoundTransliterator::init(UVector& list, UTransDirection direction, UBool fixReverseID, UErrorCode& status) { // assert(trans == 0); // Allocate array if (U_SUCCESS(status)) { count = list.size(); trans = (Transliterator **)uprv_malloc(count * sizeof(Transliterator *)); /* test for NULL */ if (trans == 0) { status = U_MEMORY_ALLOCATION_ERROR; return; } } if (U_FAILURE(status) || trans == 0) { // assert(trans == 0); return; } // Move the transliterators from the vector into an array. // Reverse the order if necessary. int32_t i; for (i=0; i<count; ++i) { int32_t j = (direction == UTRANS_FORWARD) ? i : count - 1 - i; trans[i] = (Transliterator*) list.elementAt(j); } // Fix compoundRBTIndex for REVERSE transliterators if (compoundRBTIndex >= 0 && direction == UTRANS_REVERSE) { compoundRBTIndex = count - 1 - compoundRBTIndex; } // If the direction is UTRANS_REVERSE then we may need to fix the // ID. if (direction == UTRANS_REVERSE && fixReverseID) { UnicodeString newID; for (i=0; i<count; ++i) { if (i > 0) { newID.append(ID_DELIM); } newID.append(trans[i]->getID()); } setID(newID); } computeMaximumContextLength(); }
//----------------------------------------------------------------------------- // // bofFixup. Fixup for state tables that include {bof} beginning of input testing. // Do an swizzle similar to chaining, modifying the followPos set of // the bofNode to include the followPos nodes from other {bot} nodes // scattered through the tree. // // This function has much in common with calcChainedFollowPos(). // //----------------------------------------------------------------------------- void RBBITableBuilder::bofFixup() { if (U_FAILURE(*fStatus)) { return; } // The parse tree looks like this ... // fTree root ---> <cat> // / \ . // <cat> <#end node> // / \ . // <bofNode> rest // of tree // // We will be adding things to the followPos set of the <bofNode> // RBBINode * bofNode = fTree->fLeftChild->fLeftChild; U_ASSERT(bofNode->fType == RBBINode::leafChar); U_ASSERT(bofNode->fVal == 2); // Get all nodes that can be the start a match of the user-written rules // (excluding the fake bofNode) // We want the nodes that can start a match in the // part labeled "rest of tree" // UVector * matchStartNodes = fTree->fLeftChild->fRightChild->fFirstPosSet; RBBINode * startNode; int startNodeIx; for (startNodeIx = 0; startNodeIx < matchStartNodes->size(); startNodeIx++) { startNode = (RBBINode *)matchStartNodes->elementAt(startNodeIx); if (startNode->fType != RBBINode::leafChar) { continue; } if (startNode->fVal == bofNode->fVal) { // We found a leaf node corresponding to a {bof} that was // explicitly written into a rule. // Add everything from the followPos set of this node to the // followPos set of the fake bofNode at the start of the tree. // setAdd(bofNode->fFollowPos, startNode->fFollowPos); } } }
//----------------------------------------------------------------------------- // // calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9 // //----------------------------------------------------------------------------- void RBBITableBuilder::calcFollowPos(RBBINode * n) { if (n == NULL || n->fType == RBBINode::leafChar || n->fType == RBBINode::endMark) { return; } calcFollowPos(n->fLeftChild); calcFollowPos(n->fRightChild); // Aho rule #1 if (n->fType == RBBINode::opCat) { RBBINode * i; // is 'i' in Aho's description uint32_t ix; UVector * LastPosOfLeftChild = n->fLeftChild->fLastPosSet; for (ix = 0; ix < (uint32_t)LastPosOfLeftChild->size(); ix++) { i = (RBBINode *)LastPosOfLeftChild->elementAt(ix); setAdd(i->fFollowPos, n->fRightChild->fFirstPosSet); } } // Aho rule #2 if (n->fType == RBBINode::opStar || n->fType == RBBINode::opPlus) { RBBINode * i; // again, n and i are the names from Aho's description. uint32_t ix; for (ix = 0; ix < (uint32_t)n->fLastPosSet->size(); ix++) { i = (RBBINode *)n->fLastPosSet->elementAt(ix); setAdd(i->fFollowPos, n->fFirstPosSet); } } }
void RBBITableBuilder::printRuleStatusTable() { int32_t thisRecord = 0; int32_t nextRecord = 0; int i; UVector *tbl = fRB->fRuleStatusVals; RBBIDebugPrintf("index | tags \n"); RBBIDebugPrintf("-------------------\n"); while (nextRecord < tbl->size()) { thisRecord = nextRecord; nextRecord = thisRecord + tbl->elementAti(thisRecord) + 1; RBBIDebugPrintf("%4d ", thisRecord); for (i=thisRecord+1; i<nextRecord; i++) { RBBIDebugPrintf(" %5d", tbl->elementAti(i)); } RBBIDebugPrintf("\n"); } RBBIDebugPrintf("\n\n"); }
/** * Remove a source-target/variant from the specDAG. */ void TransliteratorRegistry::removeSTV(const UnicodeString& source, const UnicodeString& target, const UnicodeString& variant) { // assert(source.length() > 0); // assert(target.length() > 0); // UErrorCode status = U_ZERO_ERROR; Hashtable *targets = (Hashtable*) specDAG.get(source); if (targets == 0) { return; // should never happen for valid s-t/v } UVector *variants = (UVector*) targets->get(target); if (variants == 0) { return; // should never happen for valid s-t/v } variants->removeElement((void*) &variant); if (variants->size() == 0) { targets->remove(target); // should delete variants if (targets->count() == 0) { specDAG.remove(source); // should delete targets } } }
//----------------------------------------------------------------------------- // // mergeRuleStatusVals // // Update the global table of rule status {tag} values // The rule builder has a global vector of status values that are common // for all tables. Merge the ones from this table into the global set. // //----------------------------------------------------------------------------- void RBBITableBuilder::mergeRuleStatusVals() { // // The basic outline of what happens here is this... // // for each state in this state table // if the status tag list for this state is in the global statuses list // record where and // continue with the next state // else // add the tag list for this state to the global list. // int i; int n; // Pre-set a single tag of {0} into the table. // We will need this as a default, for rule sets with no explicit tagging. if (fRB->fRuleStatusVals->size() == 0) { fRB->fRuleStatusVals->addElement(1, *fStatus); // Num of statuses in group fRB->fRuleStatusVals->addElement((int32_t)0, *fStatus); // and our single status of zero } // For each state for (n=0; n<fDStates->size(); n++) { RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); UVector *thisStatesTagValues = sd->fTagVals; if (thisStatesTagValues == NULL) { // No tag values are explicitly associated with this state. // Set the default tag value. sd->fTagsIdx = 0; continue; } // There are tag(s) associated with this state. // fTagsIdx will be the index into the global tag list for this state's tag values. // Initial value of -1 flags that we haven't got it set yet. sd->fTagsIdx = -1; int32_t thisTagGroupStart = 0; // indexes into the global rule status vals list int32_t nextTagGroupStart = 0; // Loop runs once per group of tags in the global list while (nextTagGroupStart < fRB->fRuleStatusVals->size()) { thisTagGroupStart = nextTagGroupStart; nextTagGroupStart += fRB->fRuleStatusVals->elementAti(thisTagGroupStart) + 1; if (thisStatesTagValues->size() != fRB->fRuleStatusVals->elementAti(thisTagGroupStart)) { // The number of tags for this state is different from // the number of tags in this group from the global list. // Continue with the next group from the global list. continue; } // The lengths match, go ahead and compare the actual tag values // between this state and the group from the global list. for (i=0; i<thisStatesTagValues->size(); i++) { if (thisStatesTagValues->elementAti(i) != fRB->fRuleStatusVals->elementAti(thisTagGroupStart + 1 + i) ) { // Mismatch. break; } } if (i == thisStatesTagValues->size()) { // We found a set of tag values in the global list that match // those for this state. Use them. sd->fTagsIdx = thisTagGroupStart; break; } } if (sd->fTagsIdx == -1) { // No suitable entry in the global tag list already. Add one sd->fTagsIdx = fRB->fRuleStatusVals->size(); fRB->fRuleStatusVals->addElement(thisStatesTagValues->size(), *fStatus); for (i=0; i<thisStatesTagValues->size(); i++) { fRB->fRuleStatusVals->addElement(thisStatesTagValues->elementAti(i), *fStatus); } } } }
//----------------------------------------------------------------------------- // // buildStateTable() Determine the set of runtime DFA states and the // transition tables for these states, by the algorithm // of fig. 3.44 in Aho. // // Most of the comments are quotes of Aho's psuedo-code. // //----------------------------------------------------------------------------- void RBBITableBuilder::buildStateTable() { if (U_FAILURE(*fStatus)) { return; } // // Add a dummy state 0 - the stop state. Not from Aho. int lastInputSymbol = fRB->fSetBuilder->getNumCharCategories() - 1; RBBIStateDescriptor *failState = new RBBIStateDescriptor(lastInputSymbol, fStatus); failState->fPositions = new UVector(*fStatus); if (U_FAILURE(*fStatus)) { return; } fDStates->addElement(failState, *fStatus); if (U_FAILURE(*fStatus)) { return; } // initially, the only unmarked state in Dstates is firstpos(root), // where toot is the root of the syntax tree for (r)#; RBBIStateDescriptor *initialState = new RBBIStateDescriptor(lastInputSymbol, fStatus); if (U_FAILURE(*fStatus)) { return; } initialState->fPositions = new UVector(*fStatus); if (U_FAILURE(*fStatus)) { return; } setAdd(initialState->fPositions, fTree->fFirstPosSet); fDStates->addElement(initialState, *fStatus); if (U_FAILURE(*fStatus)) { return; } // while there is an unmarked state T in Dstates do begin for (;;) { RBBIStateDescriptor *T = NULL; int32_t tx; for (tx=1; tx<fDStates->size(); tx++) { RBBIStateDescriptor *temp; temp = (RBBIStateDescriptor *)fDStates->elementAt(tx); if (temp->fMarked == FALSE) { T = temp; break; } } if (T == NULL) { break; } // mark T; T->fMarked = TRUE; // for each input symbol a do begin int32_t a; for (a = 1; a<=lastInputSymbol; a++) { // let U be the set of positions that are in followpos(p) // for some position p in T // such that the symbol at position p is a; UVector *U = NULL; RBBINode *p; int32_t px; for (px=0; px<T->fPositions->size(); px++) { p = (RBBINode *)T->fPositions->elementAt(px); if ((p->fType == RBBINode::leafChar) && (p->fVal == a)) { if (U == NULL) { U = new UVector(*fStatus); } setAdd(U, p->fFollowPos); } } // if U is not empty and not in DStates then int32_t ux = 0; UBool UinDstates = FALSE; if (U != NULL) { U_ASSERT(U->size() > 0); int ix; for (ix=0; ix<fDStates->size(); ix++) { RBBIStateDescriptor *temp2; temp2 = (RBBIStateDescriptor *)fDStates->elementAt(ix); if (setEquals(U, temp2->fPositions)) { delete U; U = temp2->fPositions; ux = ix; UinDstates = TRUE; break; } } // Add U as an unmarked state to Dstates if (!UinDstates) { RBBIStateDescriptor *newState = new RBBIStateDescriptor(lastInputSymbol, fStatus); if (U_FAILURE(*fStatus)) { return; } newState->fPositions = U; fDStates->addElement(newState, *fStatus); if (U_FAILURE(*fStatus)) { return; } ux = fDStates->size()-1; } // Dtran[T, a] := U; T->fDtran->setElementAt(ux, a); } } } }
//----------------------------------------------------------------------------- // // calcChainedFollowPos. Modify the previously calculated followPos sets // to implement rule chaining. NOT described by Aho // //----------------------------------------------------------------------------- void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree) { UVector endMarkerNodes(*fStatus); UVector leafNodes(*fStatus); int32_t i; if (U_FAILURE(*fStatus)) { return; } // get a list of all endmarker nodes. tree->findNodes(&endMarkerNodes, RBBINode::endMark, *fStatus); // get a list all leaf nodes tree->findNodes(&leafNodes, RBBINode::leafChar, *fStatus); if (U_FAILURE(*fStatus)) { return; } // Get all nodes that can be the start a match, which is FirstPosition() // of the portion of the tree corresponding to user-written rules. // See the tree description in bofFixup(). RBBINode *userRuleRoot = tree; if (fRB->fSetBuilder->sawBOF()) { userRuleRoot = tree->fLeftChild->fRightChild; } U_ASSERT(userRuleRoot != NULL); UVector *matchStartNodes = userRuleRoot->fFirstPosSet; // Iteratate over all leaf nodes, // int32_t endNodeIx; int32_t startNodeIx; for (endNodeIx=0; endNodeIx<leafNodes.size(); endNodeIx++) { RBBINode *tNode = (RBBINode *)leafNodes.elementAt(endNodeIx); RBBINode *endNode = NULL; // Identify leaf nodes that correspond to overall rule match positions. // These include an endMarkerNode in their followPos sets. for (i=0; i<endMarkerNodes.size(); i++) { if (tNode->fFollowPos->contains(endMarkerNodes.elementAt(i))) { endNode = tNode; break; } } if (endNode == NULL) { // node wasn't an end node. Try again with the next. continue; } // We've got a node that can end a match. // Line Break Specific hack: If this node's val correspond to the $CM char class, // don't chain from it. // TODO: Add rule syntax for this behavior, get specifics out of here and // into the rule file. if (fRB->fLBCMNoChain) { UChar32 c = this->fRB->fSetBuilder->getFirstChar(endNode->fVal); if (c != -1) { // c == -1 occurs with sets containing only the {eof} marker string. ULineBreak cLBProp = (ULineBreak)u_getIntPropertyValue(c, UCHAR_LINE_BREAK); if (cLBProp == U_LB_COMBINING_MARK) { continue; } } } // Now iterate over the nodes that can start a match, looking for ones // with the same char class as our ending node. RBBINode *startNode; for (startNodeIx = 0; startNodeIx<matchStartNodes->size(); startNodeIx++) { startNode = (RBBINode *)matchStartNodes->elementAt(startNodeIx); if (startNode->fType != RBBINode::leafChar) { continue; } if (endNode->fVal == startNode->fVal) { // The end val (character class) of one possible match is the // same as the start of another. // Add all nodes from the followPos of the start node to the // followPos set of the end node, which will have the effect of // letting matches transition from a match state at endNode // to the second char of a match starting with startNode. setAdd(endNode->fFollowPos, startNode->fFollowPos); } } } }
/** * Convert the elements of the 'list' vector, which are SingleID * objects, into actual Transliterator objects. In the course of * this, some (or all) entries may be removed. If all entries * are removed, the NULL transliterator will be added. * * Delete entries with empty basicIDs; these are generated by * elements like "(A)" in the forward direction, or "A()" in * the reverse. THIS MAY RESULT IN AN EMPTY VECTOR. Convert * SingleID entries to actual transliterators. * * @param list vector of SingleID objects. On exit, vector * of one or more Transliterators. * @return new value of insertIndex. The index will shift if * there are empty items, like "(Lower)", with indices less than * insertIndex. */ void TransliteratorIDParser::instantiateList(UVector & list, UErrorCode & ec) { UVector tlist(ec); if (U_FAILURE(ec)) { goto RETURN; } tlist.setDeleter(_deleteTransliteratorTrIDPars); Transliterator * t; int32_t i; for (i = 0; i <= list.size(); ++i) // [sic]: i<=list.size() { // We run the loop too long by one, so we can // do an insert after the last element if (i == list.size()) { break; } SingleID * single = (SingleID *) list.elementAt(i); if (single->basicID.length() != 0) { t = single->createInstance(); if (t == NULL) { ec = U_INVALID_ID; goto RETURN; } tlist.addElement(t, ec); if (U_FAILURE(ec)) { delete t; goto RETURN; } } } // An empty list is equivalent to a NULL transliterator. if (tlist.size() == 0) { t = createBasicInstance(ANY_NULL, NULL); if (t == NULL) { // Should never happen ec = U_INTERNAL_TRANSLITERATOR_ERROR; } tlist.addElement(t, ec); if (U_FAILURE(ec)) { delete t; } } RETURN: UObjectDeleter * save = list.setDeleter(_deleteSingleID); list.removeAllElements(); if (U_SUCCESS(ec)) { list.setDeleter(_deleteTransliteratorTrIDPars); while (tlist.size() > 0) { t = (Transliterator *) tlist.orphanElementAt(0); list.addElement(t, ec); if (U_FAILURE(ec)) { delete t; list.removeAllElements(); break; } } } list.setDeleter(save); }
U_CDECL_END /** * Parse a compound ID, consisting of an optional forward global * filter, a separator, one or more single IDs delimited by * separators, an an optional reverse global filter. The * separator is a semicolon. The global filters are UnicodeSet * patterns. The reverse global filter must be enclosed in * parentheses. * @param id the pattern the parse * @param dir the direction. * @param canonID OUTPUT parameter that receives the canonical ID, * consisting of canonical IDs for all elements, as returned by * parseSingleID(), separated by semicolons. Previous contents * are discarded. * @param list OUTPUT parameter that receives a list of SingleID * objects representing the parsed IDs. Previous contents are * discarded. * @param globalFilter OUTPUT parameter that receives a pointer to * a newly created global filter for this ID in this direction, or * NULL if there is none. * @return TRUE if the parse succeeds, that is, if the entire * id is consumed without syntax error. */ UBool TransliteratorIDParser::parseCompoundID(const UnicodeString & id, int32_t dir, UnicodeString & canonID, UVector & list, UnicodeSet *& globalFilter) { UErrorCode ec = U_ZERO_ERROR; int32_t i; int32_t pos = 0; int32_t withParens = 1; list.removeAllElements(); UnicodeSet * filter; globalFilter = NULL; canonID.truncate(0); // Parse leading global filter, if any withParens = 0; // parens disallowed filter = parseGlobalFilter(id, pos, dir, withParens, &canonID); if (filter != NULL) { if (!ICU_Utility::parseChar(id, pos, ID_DELIM)) { // Not a global filter; backup and resume canonID.truncate(0); pos = 0; } if (dir == FORWARD) { globalFilter = filter; } else { delete filter; } filter = NULL; } UBool sawDelimiter = TRUE; for (;;) { SingleID * single = parseSingleID(id, pos, dir, ec); if (single == NULL) { break; } if (dir == FORWARD) { list.addElement(single, ec); } else { list.insertElementAt(single, 0, ec); } if (U_FAILURE(ec)) { goto FAIL; } if (!ICU_Utility::parseChar(id, pos, ID_DELIM)) { sawDelimiter = FALSE; break; } } if (list.size() == 0) { goto FAIL; } // Construct canonical ID for (i = 0; i < list.size(); ++i) { SingleID * single = (SingleID *) list.elementAt(i); canonID.append(single->canonID); if (i != (list.size() - 1)) { canonID.append(ID_DELIM); } } // Parse trailing global filter, if any, and only if we saw // a trailing delimiter after the IDs. if (sawDelimiter) { withParens = 1; // parens required filter = parseGlobalFilter(id, pos, dir, withParens, &canonID); if (filter != NULL) { // Don't require trailing ';', but parse it if present ICU_Utility::parseChar(id, pos, ID_DELIM); if (dir == REVERSE) { globalFilter = filter; } else { delete filter; } filter = NULL; } } // Trailing unparsed text is a syntax error ICU_Utility::skipWhitespace(id, pos, TRUE); if (pos != id.length()) { goto FAIL; } return TRUE; FAIL: UObjectDeleter * save = list.setDeleter(_deleteSingleID); list.removeAllElements(); list.setDeleter(save); delete globalFilter; globalFilter = NULL; return FALSE; }
virtual int32_t count(UErrorCode& status) const { return upToDate(status) ? _ids.size() : 0; }
void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const { const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode); if (U_FAILURE(errorCode)) { return; } const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0); const UnicodeString &overflowBoundary = *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1); // We make a sorted array of elements. // Some of the input may be redundant. // That is, we might have c, ch, d, where "ch" sorts just like "c", "h". // We filter out those cases. UnicodeSetIterator iter(*initialLabels_); while (iter.next()) { const UnicodeString *item = &iter.getString(); LocalPointer<UnicodeString> ownedItem; UBool checkDistinct; int32_t itemLength = item->length(); if (!item->hasMoreChar32Than(0, itemLength, 1)) { checkDistinct = FALSE; } else if(item->charAt(itemLength - 1) == 0x2a && // '*' item->charAt(itemLength - 2) != 0x2a) { // Use a label if it is marked with one trailing star, // even if the label string sorts the same when all contractions are suppressed. ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1)); item = ownedItem.getAlias(); if (item == NULL) { errorCode = U_MEMORY_ALLOCATION_ERROR; return; } checkDistinct = FALSE; } else { checkDistinct = TRUE; } if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) { // Ignore a primary-ignorable or non-alphabetic index character. } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) { // Ignore an index character that will land in the overflow bucket. } else if (checkDistinct && collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) { // Ignore a multi-code point index character that does not sort distinctly // from the sequence of its separate characters. } else { int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_); if (insertionPoint < 0) { indexCharacters.insertElementAt( ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode); } else { const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint); if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) { indexCharacters.setElementAt( ownedString(*item, ownedItem, errorCode), insertionPoint); } } } } if (U_FAILURE(errorCode)) { return; } // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element int32_t size = indexCharacters.size() - 1; if (size > maxLabelCount_) { int32_t count = 0; int32_t old = -1; for (int32_t i = 0; i < indexCharacters.size();) { ++count; int32_t bump = count * maxLabelCount_ / size; if (bump == old) { indexCharacters.removeElementAt(i); } else { old = bump; ++i; } } } }
// Build the Whole Script Confusable data // // TODO: Reorganize. Either get rid of the WSConfusableDataBuilder class, // because everything is local to this one build function anyhow, // OR // break this function into more reasonably sized pieces, with // state in WSConfusableDataBuilder. // void buildWSConfusableData(SpoofImpl *spImpl, const char * confusablesWS, int32_t confusablesWSLen, UParseError *pe, UErrorCode &status) { if (U_FAILURE(status)) { return; } URegularExpression *parseRegexp = NULL; int32_t inputLen = 0; UChar *input = NULL; int32_t lineNum = 0; UVector *scriptSets = NULL; uint32_t rtScriptSetsCount = 2; UTrie2 *anyCaseTrie = NULL; UTrie2 *lowerCaseTrie = NULL; anyCaseTrie = utrie2_open(0, 0, &status); lowerCaseTrie = utrie2_open(0, 0, &status); // The scriptSets vector provides a mapping from TRIE values to the set of scripts. // // Reserved TRIE values: // 0: Code point has no whole script confusables. // 1: Code point is of script Common or Inherited. // These code points do not participate in whole script confusable detection. // (This is logically equivalent to saying that they contain confusables in // all scripts) // // Because Trie values are indexes into the ScriptSets vector, pre-fill // vector positions 0 and 1 to avoid conflicts with the reserved values. scriptSets = new UVector(status); if (scriptSets == NULL) { status = U_MEMORY_ALLOCATION_ERROR; goto cleanup; } scriptSets->addElement((void *)NULL, status); scriptSets->addElement((void *)NULL, status); // Convert the user input data from UTF-8 to UChar (UTF-16) u_strFromUTF8(NULL, 0, &inputLen, confusablesWS, confusablesWSLen, &status); if (status != U_BUFFER_OVERFLOW_ERROR) { goto cleanup; } status = U_ZERO_ERROR; input = static_cast<UChar *>(uprv_malloc((inputLen+1) * sizeof(UChar))); if (input == NULL) { status = U_MEMORY_ALLOCATION_ERROR; goto cleanup; } u_strFromUTF8(input, inputLen+1, NULL, confusablesWS, confusablesWSLen, &status); parseRegexp = uregex_openC(parseExp, 0, NULL, &status); // Zap any Byte Order Mark at the start of input. Changing it to a space is benign // given the syntax of the input. if (*input == 0xfeff) { *input = 0x20; } // Parse the input, one line per iteration of this loop. uregex_setText(parseRegexp, input, inputLen, &status); while (uregex_findNext(parseRegexp, &status)) { lineNum++; UChar line[200]; uregex_group(parseRegexp, 0, line, 200, &status); if (uregex_start(parseRegexp, 1, &status) >= 0) { // this was a blank or comment line. continue; } if (uregex_start(parseRegexp, 8, &status) >= 0) { // input file syntax error. status = U_PARSE_ERROR; goto cleanup; } if (U_FAILURE(status)) { goto cleanup; } // Pick up the start and optional range end code points from the parsed line. UChar32 startCodePoint = SpoofImpl::ScanHex( input, uregex_start(parseRegexp, 2, &status), uregex_end(parseRegexp, 2, &status), status); UChar32 endCodePoint = startCodePoint; if (uregex_start(parseRegexp, 3, &status) >=0) { endCodePoint = SpoofImpl::ScanHex( input, uregex_start(parseRegexp, 3, &status), uregex_end(parseRegexp, 3, &status), status); } // Extract the two script names from the source line. We need these in an 8 bit // default encoding (will be EBCDIC on IBM mainframes) in order to pass them on // to the ICU u_getPropertyValueEnum() function. Ugh. char srcScriptName[20]; char targScriptName[20]; extractGroup(parseRegexp, 4, srcScriptName, sizeof(srcScriptName), status); extractGroup(parseRegexp, 5, targScriptName, sizeof(targScriptName), status); UScriptCode srcScript = static_cast<UScriptCode>(u_getPropertyValueEnum(UCHAR_SCRIPT, srcScriptName)); UScriptCode targScript = static_cast<UScriptCode>(u_getPropertyValueEnum(UCHAR_SCRIPT, targScriptName)); if (U_FAILURE(status)) { goto cleanup; } if (srcScript == USCRIPT_INVALID_CODE || targScript == USCRIPT_INVALID_CODE) { status = U_INVALID_FORMAT_ERROR; goto cleanup; } // select the table - (A) any case or (L) lower case only UTrie2 *table = anyCaseTrie; if (uregex_start(parseRegexp, 7, &status) >= 0) { table = lowerCaseTrie; } // Build the set of scripts containing confusable characters for // the code point(s) specified in this input line. // Sanity check that the script of the source code point is the same // as the source script indicated in the input file. Failure of this check is // an error in the input file. // Include the source script in the set (needed for Mixed Script Confusable detection). // UChar32 cp; for (cp=startCodePoint; cp<=endCodePoint; cp++) { int32_t setIndex = utrie2_get32(table, cp); BuilderScriptSet *bsset = NULL; if (setIndex > 0) { U_ASSERT(setIndex < scriptSets->size()); bsset = static_cast<BuilderScriptSet *>(scriptSets->elementAt(setIndex)); } else { bsset = new BuilderScriptSet(); if (bsset == NULL) { status = U_MEMORY_ALLOCATION_ERROR; goto cleanup; } bsset->codePoint = cp; bsset->trie = table; bsset->sset = new ScriptSet(); setIndex = scriptSets->size(); bsset->index = setIndex; bsset->rindex = 0; if (bsset->sset == NULL) { status = U_MEMORY_ALLOCATION_ERROR; goto cleanup; } scriptSets->addElement(bsset, status); utrie2_set32(table, cp, setIndex, &status); } bsset->sset->Union(targScript); bsset->sset->Union(srcScript); if (U_FAILURE(status)) { goto cleanup; } UScriptCode cpScript = uscript_getScript(cp, &status); if (cpScript != srcScript) { status = U_INVALID_FORMAT_ERROR; goto cleanup; } } } // Eliminate duplicate script sets. At this point we have a separate // script set for every code point that had data in the input file. // // We eliminate underlying ScriptSet objects, not the BuildScriptSets that wrap them // // printf("Number of scriptSets: %d\n", scriptSets->size()); { int32_t duplicateCount = 0; rtScriptSetsCount = 2; for (int32_t outeri=2; outeri<scriptSets->size(); outeri++) { BuilderScriptSet *outerSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(outeri)); if (outerSet->index != static_cast<uint32_t>(outeri)) { // This set was already identified as a duplicate. // It will not be allocated a position in the runtime array of ScriptSets. continue; } outerSet->rindex = rtScriptSetsCount++; for (int32_t inneri=outeri+1; inneri<scriptSets->size(); inneri++) { BuilderScriptSet *innerSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(inneri)); if (*(outerSet->sset) == *(innerSet->sset) && outerSet->sset != innerSet->sset) { delete innerSet->sset; innerSet->scriptSetOwned = FALSE; innerSet->sset = outerSet->sset; innerSet->index = outeri; innerSet->rindex = outerSet->rindex; duplicateCount++; } // But this doesn't get all. We need to fix the TRIE. } } // printf("Number of distinct script sets: %d\n", rtScriptSetsCount); } // Update the Trie values to be reflect the run time script indexes (after duplicate merging). // (Trie Values 0 and 1 are reserved, and the corresponding slots in scriptSets // are unused, which is why the loop index starts at 2.) { for (int32_t i=2; i<scriptSets->size(); i++) { BuilderScriptSet *bSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(i)); if (bSet->rindex != (uint32_t)i) { utrie2_set32(bSet->trie, bSet->codePoint, bSet->rindex, &status); } } } // For code points with script==Common or script==Inherited, // Set the reserved value of 1 into both Tries. These characters do not participate // in Whole Script Confusable detection; this reserved value is the means // by which they are detected. { UnicodeSet ignoreSet; ignoreSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_COMMON, status); UnicodeSet inheritedSet; inheritedSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_INHERITED, status); ignoreSet.addAll(inheritedSet); for (int32_t rn=0; rn<ignoreSet.getRangeCount(); rn++) { UChar32 rangeStart = ignoreSet.getRangeStart(rn); UChar32 rangeEnd = ignoreSet.getRangeEnd(rn); utrie2_setRange32(anyCaseTrie, rangeStart, rangeEnd, 1, TRUE, &status); utrie2_setRange32(lowerCaseTrie, rangeStart, rangeEnd, 1, TRUE, &status); } } // Serialize the data to the Spoof Detector { utrie2_freeze(anyCaseTrie, UTRIE2_16_VALUE_BITS, &status); int32_t size = utrie2_serialize(anyCaseTrie, NULL, 0, &status); // printf("Any case Trie size: %d\n", size); if (status != U_BUFFER_OVERFLOW_ERROR) { goto cleanup; } status = U_ZERO_ERROR; spImpl->fSpoofData->fRawData->fAnyCaseTrie = spImpl->fSpoofData->fMemLimit; spImpl->fSpoofData->fRawData->fAnyCaseTrieLength = size; spImpl->fSpoofData->fAnyCaseTrie = anyCaseTrie; void *where = spImpl->fSpoofData->reserveSpace(size, status); utrie2_serialize(anyCaseTrie, where, size, &status); utrie2_freeze(lowerCaseTrie, UTRIE2_16_VALUE_BITS, &status); size = utrie2_serialize(lowerCaseTrie, NULL, 0, &status); // printf("Lower case Trie size: %d\n", size); if (status != U_BUFFER_OVERFLOW_ERROR) { goto cleanup; } status = U_ZERO_ERROR; spImpl->fSpoofData->fRawData->fLowerCaseTrie = spImpl->fSpoofData->fMemLimit; spImpl->fSpoofData->fRawData->fLowerCaseTrieLength = size; spImpl->fSpoofData->fLowerCaseTrie = lowerCaseTrie; where = spImpl->fSpoofData->reserveSpace(size, status); utrie2_serialize(lowerCaseTrie, where, size, &status); spImpl->fSpoofData->fRawData->fScriptSets = spImpl->fSpoofData->fMemLimit; spImpl->fSpoofData->fRawData->fScriptSetsLength = rtScriptSetsCount; ScriptSet *rtScriptSets = static_cast<ScriptSet *> (spImpl->fSpoofData->reserveSpace(rtScriptSetsCount * sizeof(ScriptSet), status)); uint32_t rindex = 2; for (int32_t i=2; i<scriptSets->size(); i++) { BuilderScriptSet *bSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(i)); if (bSet->rindex < rindex) { // We have already copied this script set to the serialized data. continue; } U_ASSERT(rindex == bSet->rindex); rtScriptSets[rindex] = *bSet->sset; // Assignment of a ScriptSet just copies the bits. rindex++; } } // Open new utrie2s from the serialized data. We don't want to keep the ones // we just built because we would then have two copies of the data, one internal to // the utries that we have already constructed, and one in the serialized data area. // An alternative would be to not pre-serialize the Trie data, but that makes the // spoof detector data different, depending on how the detector was constructed. // It's simpler to keep the data always the same. spImpl->fSpoofData->fAnyCaseTrie = utrie2_openFromSerialized( UTRIE2_16_VALUE_BITS, (const char *)spImpl->fSpoofData->fRawData + spImpl->fSpoofData->fRawData->fAnyCaseTrie, spImpl->fSpoofData->fRawData->fAnyCaseTrieLength, NULL, &status); spImpl->fSpoofData->fLowerCaseTrie = utrie2_openFromSerialized( UTRIE2_16_VALUE_BITS, (const char *)spImpl->fSpoofData->fRawData + spImpl->fSpoofData->fRawData->fLowerCaseTrie, spImpl->fSpoofData->fRawData->fAnyCaseTrieLength, NULL, &status); cleanup: if (U_FAILURE(status)) { pe->line = lineNum; } uregex_close(parseRegexp); uprv_free(input); int32_t i; for (i=0; i<scriptSets->size(); i++) { BuilderScriptSet *bsset = static_cast<BuilderScriptSet *>(scriptSets->elementAt(i)); delete bsset; } delete scriptSets; utrie2_close(anyCaseTrie); utrie2_close(lowerCaseTrie); return; }
/* * Initializes the region data from the ICU resource bundles. The region data * contains the basic relationships such as which regions are known, what the numeric * codes are, any known aliases, and the territory containment data. * * If the region data has already loaded, then this method simply returns without doing * anything meaningful. */ void Region::loadRegionData() { if (regionDataIsLoaded) { return; } umtx_lock(&gRegionDataLock); if (regionDataIsLoaded) { // In case another thread gets to it before we do... umtx_unlock(&gRegionDataLock); return; } UErrorCode status = U_ZERO_ERROR; UResourceBundle* regionCodes = NULL; UResourceBundle* territoryAlias = NULL; UResourceBundle* codeMappings = NULL; UResourceBundle* worldContainment = NULL; UResourceBundle* territoryContainment = NULL; UResourceBundle* groupingContainment = NULL; DecimalFormat *df = new DecimalFormat(status); df->setParseIntegerOnly(TRUE); regionIDMap = uhash_open(uhash_hashUnicodeString,uhash_compareUnicodeString,NULL,&status); uhash_setValueDeleter(regionIDMap, deleteRegion); numericCodeMap = uhash_open(uhash_hashLong,uhash_compareLong,NULL,&status); regionAliases = uhash_open(uhash_hashUnicodeString,uhash_compareUnicodeString,NULL,&status); uhash_setKeyDeleter(regionAliases,uprv_deleteUObject); UResourceBundle *rb = ures_openDirect(NULL,"metadata",&status); regionCodes = ures_getByKey(rb,"regionCodes",NULL,&status); territoryAlias = ures_getByKey(rb,"territoryAlias",NULL,&status); UResourceBundle *rb2 = ures_openDirect(NULL,"supplementalData",&status); codeMappings = ures_getByKey(rb2,"codeMappings",NULL,&status); territoryContainment = ures_getByKey(rb2,"territoryContainment",NULL,&status); worldContainment = ures_getByKey(territoryContainment,"001",NULL,&status); groupingContainment = ures_getByKey(territoryContainment,"grouping",NULL,&status); UVector *continents = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); while ( ures_hasNext(worldContainment) ) { UnicodeString *continentName = new UnicodeString(ures_getNextUnicodeString(worldContainment,NULL,&status)); continents->addElement(continentName,status); } UVector *groupings = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); while ( ures_hasNext(groupingContainment) ) { UnicodeString *groupingName = new UnicodeString(ures_getNextUnicodeString(groupingContainment,NULL,&status)); groupings->addElement(groupingName,status); } while ( ures_hasNext(regionCodes) ) { UnicodeString regionID = ures_getNextUnicodeString(regionCodes,NULL,&status); Region *r = new Region(); r->idStr = regionID; r->idStr.extract(0,r->idStr.length(),r->id,sizeof(r->id),US_INV); r->type = URGN_TERRITORY; // Only temporary - figure out the real type later once the aliases are known. uhash_put(regionIDMap,(void *)&(r->idStr),(void *)r,&status); Formattable result; UErrorCode ps = U_ZERO_ERROR; df->parse(r->idStr,result,ps); if ( U_SUCCESS(ps) ) { r->code = result.getLong(); // Convert string to number uhash_iput(numericCodeMap,r->code,(void *)r,&status); r->type = URGN_SUBCONTINENT; } else { r->code = Region::UNDEFINED_NUMERIC_CODE; } } // Process the territory aliases while ( ures_hasNext(territoryAlias) ) { UResourceBundle *res = ures_getNextResource(territoryAlias,NULL,&status); const char *aliasFrom = ures_getKey(res); UnicodeString* aliasFromStr = new UnicodeString(aliasFrom); UnicodeString aliasTo = ures_getUnicodeString(res,&status); ures_close(res); Region *aliasToRegion = (Region *) uhash_get(regionIDMap,&aliasTo); Region *aliasFromRegion = (Region *)uhash_get(regionIDMap,aliasFromStr); if ( aliasToRegion != NULL && aliasFromRegion == NULL ) { // This is just an alias from some string to a region uhash_put(regionAliases,(void *)aliasFromStr, (void *)aliasToRegion,&status); } else { if ( aliasFromRegion == NULL ) { // Deprecated region code not in the master codes list - so need to create a deprecated region for it. aliasFromRegion = new Region(); aliasFromRegion->idStr.setTo(*aliasFromStr); aliasFromRegion->idStr.extract(0,aliasFromRegion->idStr.length(),aliasFromRegion->id,sizeof(aliasFromRegion->id),US_INV); uhash_put(regionIDMap,(void *)&(aliasFromRegion->idStr),(void *)aliasFromRegion,&status); Formattable result; UErrorCode ps = U_ZERO_ERROR; df->parse(aliasFromRegion->idStr,result,ps); if ( U_SUCCESS(ps) ) { aliasFromRegion->code = result.getLong(); // Convert string to number uhash_iput(numericCodeMap,aliasFromRegion->code,(void *)aliasFromRegion,&status); } else { aliasFromRegion->code = Region::UNDEFINED_NUMERIC_CODE; } aliasFromRegion->type = URGN_DEPRECATED; } else { aliasFromRegion->type = URGN_DEPRECATED; } delete aliasFromStr; aliasFromRegion->preferredValues = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); UnicodeString currentRegion; currentRegion.remove(); for (int32_t i = 0 ; i < aliasTo.length() ; i++ ) { if ( aliasTo.charAt(i) != 0x0020 ) { currentRegion.append(aliasTo.charAt(i)); } if ( aliasTo.charAt(i) == 0x0020 || i+1 == aliasTo.length() ) { Region *target = (Region *)uhash_get(regionIDMap,(void *)¤tRegion); if (target) { UnicodeString *preferredValue = new UnicodeString(target->idStr); aliasFromRegion->preferredValues->addElement((void *)preferredValue,status); } currentRegion.remove(); } } } } // Process the code mappings - This will allow us to assign numeric codes to most of the territories. while ( ures_hasNext(codeMappings) ) { UResourceBundle *mapping = ures_getNextResource(codeMappings,NULL,&status); if ( ures_getType(mapping) == URES_ARRAY && ures_getSize(mapping) == 3) { UnicodeString codeMappingID = ures_getUnicodeStringByIndex(mapping,0,&status); UnicodeString codeMappingNumber = ures_getUnicodeStringByIndex(mapping,1,&status); UnicodeString codeMapping3Letter = ures_getUnicodeStringByIndex(mapping,2,&status); Region *r = (Region *)uhash_get(regionIDMap,(void *)&codeMappingID); if ( r ) { Formattable result; UErrorCode ps = U_ZERO_ERROR; df->parse(codeMappingNumber,result,ps); if ( U_SUCCESS(ps) ) { r->code = result.getLong(); // Convert string to number uhash_iput(numericCodeMap,r->code,(void *)r,&status); } UnicodeString *code3 = new UnicodeString(codeMapping3Letter); uhash_put(regionAliases,(void *)code3, (void *)r,&status); } } ures_close(mapping); } // Now fill in the special cases for WORLD, UNKNOWN, CONTINENTS, and GROUPINGS Region *r; r = (Region *) uhash_get(regionIDMap,(void *)&WORLD_ID); if ( r ) { r->type = URGN_WORLD; } r = (Region *) uhash_get(regionIDMap,(void *)&UNKNOWN_REGION_ID); if ( r ) { r->type = URGN_UNKNOWN; } for ( int32_t i = 0 ; i < continents->size() ; i++ ) { r = (Region *) uhash_get(regionIDMap,(void *)continents->elementAt(i)); if ( r ) { r->type = URGN_CONTINENT; } } delete continents; for ( int32_t i = 0 ; i < groupings->size() ; i++ ) { r = (Region *) uhash_get(regionIDMap,(void *)groupings->elementAt(i)); if ( r ) { r->type = URGN_GROUPING; } } delete groupings; // Special case: The region code "QO" (Outlying Oceania) is a subcontinent code added by CLDR // even though it looks like a territory code. Need to handle it here. r = (Region *) uhash_get(regionIDMap,(void *)&OUTLYING_OCEANIA_REGION_ID); if ( r ) { r->type = URGN_SUBCONTINENT; } // Load territory containment info from the supplemental data. while ( ures_hasNext(territoryContainment) ) { UResourceBundle *mapping = ures_getNextResource(territoryContainment,NULL,&status); const char *parent = ures_getKey(mapping); UnicodeString parentStr = UnicodeString(parent); Region *parentRegion = (Region *) uhash_get(regionIDMap,(void *)&parentStr); for ( int j = 0 ; j < ures_getSize(mapping); j++ ) { UnicodeString child = ures_getUnicodeStringByIndex(mapping,j,&status); Region *childRegion = (Region *) uhash_get(regionIDMap,(void *)&child); if ( parentRegion != NULL && childRegion != NULL ) { // Add the child region to the set of regions contained by the parent if (parentRegion->containedRegions == NULL) { parentRegion->containedRegions = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); } UnicodeString *childStr = new UnicodeString(status); childStr->fastCopyFrom(childRegion->idStr); parentRegion->containedRegions->addElement((void *)childStr,status); // Set the parent region to be the containing region of the child. // Regions of type GROUPING can't be set as the parent, since another region // such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent. if ( parentRegion->type != URGN_GROUPING) { childRegion->containingRegion = parentRegion; } } } ures_close(mapping); } // Create the availableRegions lists int32_t pos = -1; while ( const UHashElement* element = uhash_nextElement(regionIDMap,&pos)) { Region *ar = (Region *)element->value.pointer; if ( availableRegions[ar->type] == NULL ) { availableRegions[ar->type] = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); } UnicodeString *arString = new UnicodeString(ar->idStr); availableRegions[ar->type]->addElement((void *)arString,status); } ures_close(territoryContainment); ures_close(worldContainment); ures_close(groupingContainment); ures_close(codeMappings); ures_close(rb2); ures_close(territoryAlias); ures_close(regionCodes); ures_close(rb); delete df; ucln_i18n_registerCleanup(UCLN_I18N_REGION, region_cleanup); regionDataIsLoaded = true; umtx_unlock(&gRegionDataLock); }
virtual const UnicodeString* snext(UErrorCode& status) { if (upToDate(status) && (_pos < _ids.size())) { return (const UnicodeString*)_ids[_pos++]; } return NULL; }
static unsigned cookie_split(UVector<UString>& vec, const UString& buffer, const char* delim) { U_TRACE(5, "cookie_split(%p,%.*S,%S)", &vec, U_STRING_TO_TRACE(buffer), delim) UString x; unsigned r, n = vec.size(); const char* s = buffer.data(); const char* ss = s; const char* end = s + buffer.size(); const char* p; const char* b = s; loop: if (s >= end) goto done; if (strchr(delim, *s)) { ++s; goto loop; } else { while (isspace(*s)) s++; p = s++; if (*(s-1) == '"') { while (s < end && *s != '"') ++s; } ss = s; while (s < end && strchr(delim,*s) == 0) { ++s; if (!isspace(*(s-1))) ss = s; } } if (*p == '"' && *(ss-1) == '"') { p++; ss--; } x = buffer.substr(p - b, ss - p); vec.push_back(x); ++s; goto loop; done: r = vec.size() - n; #ifdef DEBUG for (unsigned i = 0; i < r; ++i) { U_DUMP("vec[%d] = %.*S", n+i, U_STRING_TO_TRACE(vec[n+i])) } #endif U_RETURN(r); }