/** * Returns an enumeration over the IDs of all the regions that are children of this region anywhere in the region * hierarchy and match the given type. This API may return an empty enumeration if this region doesn't have any * sub-regions that match the given type. For example, calling this method with region "150" (Europe) and type * "URGN_TERRITORY" returns a set containing all the territories in Europe ( "FR" (France) - "IT" (Italy) - "DE" (Germany) etc. ) */ StringEnumeration* Region::getContainedRegions( URegionType type, UErrorCode &status ) const { umtx_initOnce(gRegionDataInitOnce, &loadRegionData, status); // returns immediately if U_FAILURE(status) if (U_FAILURE(status)) { return NULL; } UVector *result = new UVector(NULL, uhash_compareChars, status); StringEnumeration *cr = getContainedRegions(status); for ( int32_t i = 0 ; i < cr->count(status) ; i++ ) { const char *id = cr->next(NULL,status); const Region *r = Region::getInstance(id,status); if ( r->getType() == type ) { result->addElement((void *)&r->idStr,status); } else { StringEnumeration *children = r->getContainedRegions(type, status); for ( int32_t j = 0 ; j < children->count(status) ; j++ ) { const char *id2 = children->next(NULL,status); const Region *r2 = Region::getInstance(id2,status); result->addElement((void *)&r2->idStr,status); } delete children; } } delete cr; StringEnumeration* resultEnumeration = new RegionNameEnumeration(result,status); delete result; return resultEnumeration; }
/** * Returns an enumeration over the IDs of all the regions that are children of this region anywhere in the region * hierarchy and match the given type. This API may return an empty enumeration if this region doesn't have any * sub-regions that match the given type. For example, calling this method with region "150" (Europe) and type * "URGN_TERRITORY" returns a set containing all the territories in Europe ( "FR" (France) - "IT" (Italy) - "DE" (Germany) etc. ) */ StringEnumeration* Region::getContainedRegions( URegionType type ) const { loadRegionData(); UErrorCode status = U_ZERO_ERROR; UVector *result = new UVector(NULL, uhash_compareChars, status); StringEnumeration *cr = getContainedRegions(); for ( int32_t i = 0 ; i < cr->count(status) ; i++ ) { const char *id = cr->next(NULL,status); const Region *r = Region::getInstance(id,status); if ( r->getType() == type ) { result->addElement((void *)&r->idStr,status); } else { StringEnumeration *children = r->getContainedRegions(type); for ( int32_t j = 0 ; j < children->count(status) ; j++ ) { const char *id2 = children->next(NULL,status); const Region *r2 = Region::getInstance(id2,status); result->addElement((void *)&r2->idStr,status); } delete children; } } delete cr; StringEnumeration* resultEnumeration = new RegionNameEnumeration(result,status); delete result; return resultEnumeration; }
void CompactData::getUniquePatterns(UVector &output, UErrorCode &status) const { U_ASSERT(output.isEmpty()); // NOTE: In C++, this is done more manually with a UVector. // In Java, we can take advantage of JDK HashSet. for (auto pattern : patterns) { if (pattern == nullptr || pattern == USE_FALLBACK) { continue; } // Insert pattern into the UVector if the UVector does not already contain the pattern. // Search the UVector from the end since identical patterns are likely to be adjacent. for (int32_t i = output.size() - 1; i >= 0; i--) { if (u_strcmp(pattern, static_cast<const UChar *>(output[i])) == 0) { goto continue_outer; } } // The string was not found; add it to the UVector. // ANDY: This requires a const_cast. Why? output.addElement(const_cast<UChar *>(pattern), status); continue_outer: continue; } }
UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) { if (U_FAILURE(status)) { return NULL; } UVector *dest = new UVector(status); if (dest == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } dest->setDeleter(uprv_deleteUObject); const UChar *src = HACK_FIRST_CHARS_IN_SCRIPTS; const UChar *limit = src + LENGTHOF(HACK_FIRST_CHARS_IN_SCRIPTS); do { if (U_FAILURE(status)) { return dest; } UnicodeString *str = new UnicodeString(src, -1); if (str == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return dest; } dest->addElement(str, status); src += str->length() + 1; } while (src < limit); return dest; }
void CharacterNode::addValue(void *value, UObjectDeleter *valueDeleter, UErrorCode &status) { if (U_FAILURE(status)) { if (valueDeleter) { valueDeleter(value); } return; } if (fValues == NULL) { fValues = value; } else { // At least one value already. if (!fHasValuesVector) { // There is only one value so far, and not in a vector yet. // Create a vector and add the old value. UVector *values = new UVector(valueDeleter, NULL, DEFAULT_CHARACTERNODE_CAPACITY, status); if (U_FAILURE(status)) { if (valueDeleter) { valueDeleter(value); } return; } values->addElement(fValues, status); fValues = values; fHasValuesVector = TRUE; } // Add the new value. ((UVector *)fValues)->addElement(value, status); } }
UVector* RuleBasedTimeZone::copyRules(UVector* source) { if (source == NULL) { return NULL; } UErrorCode ec = U_ZERO_ERROR; int32_t size = source->size(); UVector *rules = new UVector(size, ec); if (U_FAILURE(ec)) { return NULL; } int32_t i; for (i = 0; i < size; i++) { rules->addElement(((TimeZoneRule*)source->elementAt(i))->clone(), ec); if (U_FAILURE(ec)) { break; } } if (U_FAILURE(ec)) { // In case of error, clean up for (i = 0; i < rules->size(); i++) { TimeZoneRule *rule = (TimeZoneRule*)rules->orphanElementAt(i); delete rule; } delete rules; return NULL; } return rules; }
StringEnumeration* TimeZoneNamesImpl::getAvailableMetaZoneIDs(const UnicodeString& tzID, UErrorCode& status) const { if (U_FAILURE(status)) { return NULL; } const UVector* mappings = ZoneMeta::getMetazoneMappings(tzID); if (mappings == NULL) { return new MetaZoneIDsEnumeration(); } MetaZoneIDsEnumeration *senum = NULL; UVector* mzIDs = new UVector(NULL, uhash_compareUChars, status); if (mzIDs == NULL) { status = U_MEMORY_ALLOCATION_ERROR; } if (U_SUCCESS(status)) { U_ASSERT(mzIDs != NULL); for (int32_t i = 0; U_SUCCESS(status) && i < mappings->size(); i++) { OlsonToMetaMappingEntry *map = (OlsonToMetaMappingEntry *)mappings->elementAt(i); const UChar *mzID = map->mzid; if (!mzIDs->contains((void *)mzID)) { mzIDs->addElement((void *)mzID, status); } } if (U_SUCCESS(status)) { senum = new MetaZoneIDsEnumeration(mzIDs); } else { delete mzIDs; } } return senum; }
static void appendUnicodeSetToUVector(UVector &dest, const UnicodeSet &source, UErrorCode &status) { UnicodeSetIterator setIter(source); while (setIter.next()) { const UnicodeString &str = setIter.getString(); dest.addElement(str.clone(), status); } }
UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) { if (U_FAILURE(status)) { return NULL; } UVector *dest = new UVector(status); if (dest == NULL) { if (U_SUCCESS(status)) { status = U_MEMORY_ALLOCATION_ERROR; } return NULL; } dest->setDeleter(uprv_deleteUObject); const UChar *src = HACK_FIRST_CHARS_IN_SCRIPTS; const UChar *limit = src + sizeof(HACK_FIRST_CHARS_IN_SCRIPTS) / sizeof(HACK_FIRST_CHARS_IN_SCRIPTS[0]); do { if (U_FAILURE(status)) { return dest; } UnicodeString *str = new UnicodeString(src, -1); if (str == NULL) { status = U_MEMORY_ALLOCATION_ERROR; } else { dest->addElement(str, status); src += str->length() + 1; } } while (src < limit); dest->sortWithUComparator(sortCollateComparator, collator_, status); return dest; }
TestMultipleKeyStringFactory(const UnicodeString ids[], int32_t count, const UnicodeString& factoryID) : _status(U_ZERO_ERROR) , _ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, count, _status) , _factoryID(factoryID + ": ") { for (int i = 0; i < count; ++i) { _ids.addElement(new UnicodeString(ids[i]), _status); } }
UBool GNameSearchHandler::handleMatch(int32_t matchLength, const CharacterNode *node, UErrorCode &status) { if (U_FAILURE(status)) { return FALSE; } if (node->hasValues()) { int32_t valuesCount = node->countValues(); for (int32_t i = 0; i < valuesCount; i++) { GNameInfo *nameinfo = (ZNameInfo *)node->getValue(i); if (nameinfo == NULL) { break; } if ((nameinfo->type & fTypes) != 0) { // matches a requested type if (fResults == NULL) { fResults = new UVector(uprv_free, NULL, status); if (fResults == NULL) { status = U_MEMORY_ALLOCATION_ERROR; } } if (U_SUCCESS(status)) { U_ASSERT(fResults != NULL); GMatchInfo *gmatch = (GMatchInfo *)uprv_malloc(sizeof(GMatchInfo)); if (gmatch == NULL) { status = U_MEMORY_ALLOCATION_ERROR; } else { // add the match to the vector gmatch->gnameInfo = nameinfo; gmatch->matchLength = matchLength; gmatch->timeType = UTZFMT_TIME_TYPE_UNKNOWN; fResults->addElement(gmatch, status); if (U_FAILURE(status)) { uprv_free(gmatch); } else { if (matchLength > fMaxMatchLen) { fMaxMatchLen = matchLength; } } } } } } } return TRUE; }
ServiceEnumeration(const ServiceEnumeration &other, UErrorCode &status) : _service(other._service) , _timestamp(other._timestamp) , _ids(uhash_deleteUnicodeString, NULL, status) , _pos(0) { if(U_SUCCESS(status)) { int32_t i, length; length = other._ids.size(); for(i = 0; i < length; ++i) { _ids.addElement(((UnicodeString *)other._ids.elementAt(i))->clone(), status); } if(U_SUCCESS(status)) { _pos = other._pos; } } }
/** * Register a source-target/variant in the specDAG. Variant may be * empty, but source and target must not be. If variant is empty then * the special variant NO_VARIANT is stored in slot zero of the * UVector of variants. */ void TransliteratorRegistry::registerSTV(const UnicodeString& source, const UnicodeString& target, const UnicodeString& variant) { // assert(source.length() > 0); // assert(target.length() > 0); UErrorCode status = U_ZERO_ERROR; Hashtable *targets = (Hashtable*) specDAG.get(source); if (targets == 0) { targets = new Hashtable(TRUE, status); if (U_FAILURE(status) || targets == 0) { return; } targets->setValueDeleter(uprv_deleteUObject); specDAG.put(source, targets, status); } UVector *variants = (UVector*) targets->get(target); if (variants == 0) { variants = new UVector(uprv_deleteUObject, uhash_compareCaselessUnicodeString, status); if (variants == 0) { return; } targets->put(target, variants, status); } // assert(NO_VARIANT == ""); // We add the variant string. If it is the special "no variant" // string, that is, the empty string, we add it at position zero. if (!variants->contains((void*) &variant)) { UnicodeString *tempus; // Used for null pointer check. if (variant.length() > 0) { tempus = new UnicodeString(variant); if (tempus != NULL) { variants->addElement(tempus, status); } } else { tempus = new UnicodeString(); // = NO_VARIANT if (tempus != NULL) { variants->insertElementAt(tempus, 0, status); } } } }
U_CDECL_END /** * Parse a compound ID, consisting of an optional forward global * filter, a separator, one or more single IDs delimited by * separators, an an optional reverse global filter. The * separator is a semicolon. The global filters are UnicodeSet * patterns. The reverse global filter must be enclosed in * parentheses. * @param id the pattern the parse * @param dir the direction. * @param canonID OUTPUT parameter that receives the canonical ID, * consisting of canonical IDs for all elements, as returned by * parseSingleID(), separated by semicolons. Previous contents * are discarded. * @param list OUTPUT parameter that receives a list of SingleID * objects representing the parsed IDs. Previous contents are * discarded. * @param globalFilter OUTPUT parameter that receives a pointer to * a newly created global filter for this ID in this direction, or * NULL if there is none. * @return TRUE if the parse succeeds, that is, if the entire * id is consumed without syntax error. */ UBool TransliteratorIDParser::parseCompoundID(const UnicodeString & id, int32_t dir, UnicodeString & canonID, UVector & list, UnicodeSet *& globalFilter) { UErrorCode ec = U_ZERO_ERROR; int32_t i; int32_t pos = 0; int32_t withParens = 1; list.removeAllElements(); UnicodeSet * filter; globalFilter = NULL; canonID.truncate(0); // Parse leading global filter, if any withParens = 0; // parens disallowed filter = parseGlobalFilter(id, pos, dir, withParens, &canonID); if (filter != NULL) { if (!ICU_Utility::parseChar(id, pos, ID_DELIM)) { // Not a global filter; backup and resume canonID.truncate(0); pos = 0; } if (dir == FORWARD) { globalFilter = filter; } else { delete filter; } filter = NULL; } UBool sawDelimiter = TRUE; for (;;) { SingleID * single = parseSingleID(id, pos, dir, ec); if (single == NULL) { break; } if (dir == FORWARD) { list.addElement(single, ec); } else { list.insertElementAt(single, 0, ec); } if (U_FAILURE(ec)) { goto FAIL; } if (!ICU_Utility::parseChar(id, pos, ID_DELIM)) { sawDelimiter = FALSE; break; } } if (list.size() == 0) { goto FAIL; } // Construct canonical ID for (i = 0; i < list.size(); ++i) { SingleID * single = (SingleID *) list.elementAt(i); canonID.append(single->canonID); if (i != (list.size() - 1)) { canonID.append(ID_DELIM); } } // Parse trailing global filter, if any, and only if we saw // a trailing delimiter after the IDs. if (sawDelimiter) { withParens = 1; // parens required filter = parseGlobalFilter(id, pos, dir, withParens, &canonID); if (filter != NULL) { // Don't require trailing ';', but parse it if present ICU_Utility::parseChar(id, pos, ID_DELIM); if (dir == REVERSE) { globalFilter = filter; } else { delete filter; } filter = NULL; } } // Trailing unparsed text is a syntax error ICU_Utility::skipWhitespace(id, pos, TRUE); if (pos != id.length()) { goto FAIL; } return TRUE; FAIL: UObjectDeleter * save = list.setDeleter(_deleteSingleID); list.removeAllElements(); list.setDeleter(save); delete globalFilter; globalFilter = NULL; return FALSE; }
UVector* ZoneMeta::createMetazoneMappings(const UnicodeString &tzid) { UVector *mzMappings = NULL; UErrorCode status = U_ZERO_ERROR; UnicodeString canonicalID; UResourceBundle *rb = ures_openDirect(NULL, gMetaZones, &status); ures_getByKey(rb, gMetazoneInfo, rb, &status); TimeZone::getCanonicalID(tzid, canonicalID, status); if (U_SUCCESS(status)) { char tzKey[ZID_KEY_MAX]; canonicalID.extract(0, canonicalID.length(), tzKey, sizeof(tzKey), US_INV); // tzid keys are using ':' as separators char *p = tzKey; while (*p) { if (*p == '/') { *p = ':'; } p++; } ures_getByKey(rb, tzKey, rb, &status); if (U_SUCCESS(status)) { UResourceBundle *mz = NULL; while (ures_hasNext(rb)) { mz = ures_getNextResource(rb, mz, &status); const UChar *mz_name = ures_getStringByIndex(mz, 0, NULL, &status); const UChar *mz_from = gDefaultFrom; const UChar *mz_to = gDefaultTo; if (ures_getSize(mz) == 3) { mz_from = ures_getStringByIndex(mz, 1, NULL, &status); mz_to = ures_getStringByIndex(mz, 2, NULL, &status); } if(U_FAILURE(status)){ status = U_ZERO_ERROR; continue; } // We do not want to use SimpleDateformat to parse boundary dates, // because this code could be triggered by the initialization code // used by SimpleDateFormat. UDate from = parseDate(mz_from, status); UDate to = parseDate(mz_to, status); if (U_FAILURE(status)) { status = U_ZERO_ERROR; continue; } OlsonToMetaMappingEntry *entry = (OlsonToMetaMappingEntry*)uprv_malloc(sizeof(OlsonToMetaMappingEntry)); if (entry == NULL) { status = U_MEMORY_ALLOCATION_ERROR; break; } entry->mzid = mz_name; entry->from = from; entry->to = to; if (mzMappings == NULL) { mzMappings = new UVector(deleteOlsonToMetaMappingEntry, NULL, status); if (U_FAILURE(status)) { delete mzMappings; deleteOlsonToMetaMappingEntry(entry); uprv_free(entry); break; } } mzMappings->addElement(entry, status); if (U_FAILURE(status)) { break; } } ures_close(mz); if (U_FAILURE(status)) { if (mzMappings != NULL) { delete mzMappings; mzMappings = NULL; } } } } ures_close(rb); return mzMappings; }
void BasicTimeZone::getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial, UVector*& transitionRules, UErrorCode& status) /*const*/ { if (U_FAILURE(status)) { return; } const InitialTimeZoneRule *orgini; const TimeZoneRule **orgtrs = NULL; TimeZoneTransition tzt; UBool avail; UVector *orgRules = NULL; int32_t ruleCount; TimeZoneRule *r = NULL; UBool *done = NULL; InitialTimeZoneRule *res_initial = NULL; UVector *filteredRules = NULL; UnicodeString name; int32_t i; UDate time, t; UDate *newTimes = NULL; UDate firstStart; UBool bFinalStd = FALSE, bFinalDst = FALSE; // Original transition rules ruleCount = countTransitionRules(status); if (U_FAILURE(status)) { return; } orgRules = new UVector(ruleCount, status); if (U_FAILURE(status)) { return; } orgtrs = (const TimeZoneRule**)uprv_malloc(sizeof(TimeZoneRule*)*ruleCount); if (orgtrs == NULL) { status = U_MEMORY_ALLOCATION_ERROR; goto error; } getTimeZoneRules(orgini, orgtrs, ruleCount, status); if (U_FAILURE(status)) { goto error; } for (i = 0; i < ruleCount; i++) { orgRules->addElement(orgtrs[i]->clone(), status); if (U_FAILURE(status)) { goto error; } } uprv_free(orgtrs); orgtrs = NULL; avail = getPreviousTransition(start, TRUE, tzt); if (!avail) { // No need to filter out rules only applicable to time before the start initial = orgini->clone(); transitionRules = orgRules; return; } done = (UBool*)uprv_malloc(sizeof(UBool)*ruleCount); if (done == NULL) { status = U_MEMORY_ALLOCATION_ERROR; goto error; } filteredRules = new UVector(status); if (U_FAILURE(status)) { goto error; } // Create initial rule tzt.getTo()->getName(name); res_initial = new InitialTimeZoneRule(name, tzt.getTo()->getRawOffset(), tzt.getTo()->getDSTSavings()); // Mark rules which does not need to be processed for (i = 0; i < ruleCount; i++) { r = (TimeZoneRule*)orgRules->elementAt(i); avail = r->getNextStart(start, res_initial->getRawOffset(), res_initial->getDSTSavings(), FALSE, time); done[i] = !avail; } time = start; while (!bFinalStd || !bFinalDst) { avail = getNextTransition(time, FALSE, tzt); if (!avail) { break; } UDate updatedTime = tzt.getTime(); if (updatedTime == time) { // Can get here if rules for start & end of daylight time have exactly // the same time. // TODO: fix getNextTransition() to prevent it? status = U_INVALID_STATE_ERROR; goto error; } time = updatedTime; const TimeZoneRule *toRule = tzt.getTo(); for (i = 0; i < ruleCount; i++) { r = (TimeZoneRule*)orgRules->elementAt(i); if (*r == *toRule) { break; } } if (i >= ruleCount) { // This case should never happen status = U_INVALID_STATE_ERROR; goto error; } if (done[i]) { continue; } const TimeArrayTimeZoneRule *tar = dynamic_cast<const TimeArrayTimeZoneRule *>(toRule); const AnnualTimeZoneRule *ar; if (tar != NULL) { // Get the previous raw offset and DST savings before the very first start time TimeZoneTransition tzt0; t = start; while (TRUE) { avail = getNextTransition(t, FALSE, tzt0); if (!avail) { break; } if (*(tzt0.getTo()) == *tar) { break; } t = tzt0.getTime(); } if (avail) { // Check if the entire start times to be added tar->getFirstStart(tzt.getFrom()->getRawOffset(), tzt.getFrom()->getDSTSavings(), firstStart); if (firstStart > start) { // Just add the rule as is filteredRules->addElement(tar->clone(), status); if (U_FAILURE(status)) { goto error; } } else { // Colllect transitions after the start time int32_t startTimes; DateTimeRule::TimeRuleType timeType; int32_t idx; startTimes = tar->countStartTimes(); timeType = tar->getTimeType(); for (idx = 0; idx < startTimes; idx++) { tar->getStartTimeAt(idx, t); if (timeType == DateTimeRule::STANDARD_TIME) { t -= tzt.getFrom()->getRawOffset(); } if (timeType == DateTimeRule::WALL_TIME) { t -= tzt.getFrom()->getDSTSavings(); } if (t > start) { break; } } int32_t asize = startTimes - idx; if (asize > 0) { newTimes = (UDate*)uprv_malloc(sizeof(UDate) * asize); if (newTimes == NULL) { status = U_MEMORY_ALLOCATION_ERROR; goto error; } for (int32_t newidx = 0; newidx < asize; newidx++) { tar->getStartTimeAt(idx + newidx, newTimes[newidx]); if (U_FAILURE(status)) { uprv_free(newTimes); newTimes = NULL; goto error; } } tar->getName(name); TimeArrayTimeZoneRule *newTar = new TimeArrayTimeZoneRule(name, tar->getRawOffset(), tar->getDSTSavings(), newTimes, asize, timeType); uprv_free(newTimes); filteredRules->addElement(newTar, status); if (U_FAILURE(status)) { goto error; } } } } } else if ((ar = dynamic_cast<const AnnualTimeZoneRule *>(toRule)) != NULL) { ar->getFirstStart(tzt.getFrom()->getRawOffset(), tzt.getFrom()->getDSTSavings(), firstStart); if (firstStart == tzt.getTime()) { // Just add the rule as is filteredRules->addElement(ar->clone(), status); if (U_FAILURE(status)) { goto error; } } else { // Calculate the transition year int32_t year, month, dom, dow, doy, mid; Grego::timeToFields(tzt.getTime(), year, month, dom, dow, doy, mid); // Re-create the rule ar->getName(name); AnnualTimeZoneRule *newAr = new AnnualTimeZoneRule(name, ar->getRawOffset(), ar->getDSTSavings(), *(ar->getRule()), year, ar->getEndYear()); filteredRules->addElement(newAr, status); if (U_FAILURE(status)) { goto error; } } // check if this is a final rule if (ar->getEndYear() == AnnualTimeZoneRule::MAX_YEAR) { // After bot final standard and dst rules are processed, // exit this while loop. if (ar->getDSTSavings() == 0) { bFinalStd = TRUE; } else { bFinalDst = TRUE; } } } done[i] = TRUE; } // Set the results if (orgRules != NULL) { while (!orgRules->isEmpty()) { r = (TimeZoneRule*)orgRules->orphanElementAt(0); delete r; } delete orgRules; } if (done != NULL) { uprv_free(done); } initial = res_initial; transitionRules = filteredRules; return; error: if (orgtrs != NULL) { uprv_free(orgtrs); } if (orgRules != NULL) { while (!orgRules->isEmpty()) { r = (TimeZoneRule*)orgRules->orphanElementAt(0); delete r; } delete orgRules; } if (done != NULL) { if (filteredRules != NULL) { while (!filteredRules->isEmpty()) { r = (TimeZoneRule*)filteredRules->orphanElementAt(0); delete r; } delete filteredRules; } delete res_initial; uprv_free(done); } initial = NULL; transitionRules = NULL; }
UHashtable* ZoneMeta::createMetaToOlsonMap(void) { UErrorCode status = U_ZERO_ERROR; UHashtable *metaToOlson = NULL; UResourceBundle *metazones = NULL; UResourceBundle *mz = NULL; metaToOlson = uhash_open(uhash_hashUChars, uhash_compareUChars, NULL, &status); if (U_FAILURE(status)) { return NULL; } uhash_setKeyDeleter(metaToOlson, deleteUCharString); uhash_setValueDeleter(metaToOlson, deleteUVector); metazones = ures_openDirect(NULL, gSupplementalData, &status); metazones = ures_getByKey(metazones, gMapTimezonesTag, metazones, &status); metazones = ures_getByKey(metazones, gMetazonesTag, metazones, &status); if (U_FAILURE(status)) { goto error_cleanup; } while (ures_hasNext(metazones)) { mz = ures_getNextResource(metazones, mz, &status); if (U_FAILURE(status)) { status = U_ZERO_ERROR; continue; } const char *mzkey = ures_getKey(mz); if (uprv_strncmp(mzkey, gMetazoneIdPrefix, MZID_PREFIX_LEN) == 0) { const char *mzid = mzkey + MZID_PREFIX_LEN; const char *territory = uprv_strrchr(mzid, '_'); int32_t mzidLen = 0; int32_t territoryLen = 0; if (territory) { mzidLen = territory - mzid; territory++; territoryLen = uprv_strlen(territory); } if (mzidLen > 0 && territoryLen > 0) { int32_t tzidLen; const UChar *tzid = ures_getStringByIndex(mz, 0, &tzidLen, &status); if (U_SUCCESS(status)) { // Create MetaToOlsonMappingEntry MetaToOlsonMappingEntry *entry = (MetaToOlsonMappingEntry*)uprv_malloc(sizeof(MetaToOlsonMappingEntry)); if (entry == NULL) { status = U_MEMORY_ALLOCATION_ERROR; goto error_cleanup; } entry->id = tzid; entry->territory = (UChar*)uprv_malloc((territoryLen + 1) * sizeof(UChar)); if (entry->territory == NULL) { status = U_MEMORY_ALLOCATION_ERROR; uprv_free(entry); goto error_cleanup; } u_charsToUChars(territory, entry->territory, territoryLen + 1); // Check if mapping entries for metazone is already available if (mzidLen < ZID_KEY_MAX) { UChar mzidUChars[ZID_KEY_MAX]; u_charsToUChars(mzid, mzidUChars, mzidLen); mzidUChars[mzidLen++] = 0; // Add NUL terminator UVector *tzMappings = (UVector*)uhash_get(metaToOlson, mzidUChars); if (tzMappings == NULL) { // Create new UVector and put it into the hashtable tzMappings = new UVector(deleteMetaToOlsonMappingEntry, NULL, status); if (U_FAILURE(status)) { deleteMetaToOlsonMappingEntry(entry); goto error_cleanup; } UChar *key = (UChar*)uprv_malloc(mzidLen * sizeof(UChar)); if (key == NULL) { status = U_MEMORY_ALLOCATION_ERROR; delete tzMappings; deleteMetaToOlsonMappingEntry(entry); goto error_cleanup; } u_strncpy(key, mzidUChars, mzidLen); uhash_put(metaToOlson, key, tzMappings, &status); if (U_FAILURE(status)) { goto error_cleanup; } } tzMappings->addElement(entry, status); if (U_FAILURE(status)) { goto error_cleanup; } } else { deleteMetaToOlsonMappingEntry(entry); } } else { status = U_ZERO_ERROR; } } } } normal_cleanup: ures_close(mz); ures_close(metazones); return metaToOlson; error_cleanup: if (metaToOlson != NULL) { uhash_close(metaToOlson); metaToOlson = NULL; } goto normal_cleanup; }
// // First characters in scripts. // Create a UVector whose contents are pointers to UnicodeStrings for the First Characters in each script. // The vector is sorted according to this index's collation. // // This code is too slow to use, so for now hard code the data. // Hard coded implementation is follows. // UVector *AlphabeticIndex::firstStringsInScript(Collator *ruleBasedCollator, UErrorCode &status) { if (U_FAILURE(status)) { return NULL; } UnicodeString results[USCRIPT_CODE_LIMIT]; UnicodeString LOWER_A = UNICODE_STRING_SIMPLE("a"); UnicodeSetIterator siter(*TO_TRY); while (siter.next()) { const UnicodeString ¤t = siter.getString(); Collator::EComparisonResult r = ruleBasedCollator->compare(current, LOWER_A); if (r < 0) { // TODO fix; we only want "real" script characters, not // symbols. continue; } int script = uscript_getScript(current.char32At(0), &status); if (results[script].length() == 0) { results[script] = current; } else if (ruleBasedCollator->compare(current, results[script]) < 0) { results[script] = current; } } UnicodeSet extras; UnicodeSet expansions; RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(ruleBasedCollator); const UCollator *uRuleBasedCollator = rbc->getUCollator(); ucol_getContractionsAndExpansions(uRuleBasedCollator, extras.toUSet(), expansions.toUSet(), true, &status); extras.addAll(expansions).removeAll(*TO_TRY); if (extras.size() != 0) { const Normalizer2 *normalizer = Normalizer2::getNFKCInstance(status); UnicodeSetIterator extrasIter(extras); while (extrasIter.next()) { const UnicodeString ¤t = extrasIter.next(); if (!TO_TRY->containsAll(current)) continue; if (!normalizer->isNormalized(current, status) || ruleBasedCollator->compare(current, LOWER_A) < 0) { continue; } int script = uscript_getScript(current.char32At(0), &status); if (results[script].length() == 0) { results[script] = current; } else if (ruleBasedCollator->compare(current, results[script]) < 0) { results[script] = current; } } } UVector *dest = new UVector(status); dest->setDeleter(uprv_deleteUObject); for (uint32_t i = 0; i < sizeof(results) / sizeof(results[0]); ++i) { if (results[i].length() > 0) { dest->addElement(results[i].clone(), status); } } dest->sortWithUComparator(sortCollateComparator, ruleBasedCollator, status); return dest; }
void AlphabeticIndex::buildIndex(UErrorCode &status) { if (U_FAILURE(status)) { return; } if (!indexBuildRequired_) { return; } // Discard any already-built data. // This is important when the user builds and uses an index, then subsequently modifies it, // necessitating a rebuild. bucketList_->removeAllElements(); labels_->removeAllElements(); uhash_removeAll(alreadyIn_); noDistinctSorting_->clear(); notAlphabetic_->clear(); // first sort the incoming Labels, with a "best" ordering among items // that are the same according to the collator UVector preferenceSorting(status); // Vector of UnicodeStrings; owned by the vector. preferenceSorting.setDeleter(uprv_deleteUObject); appendUnicodeSetToUVector(preferenceSorting, *initialLabels_, status); preferenceSorting.sortWithUComparator(PreferenceComparator, &status, status); // We now make a set of Labels. // Some of the input may, however, be redundant. // That is, we might have c, ch, d, where "ch" sorts just like "c", "h" // So we make a pass through, filtering out those cases. // TODO: filtering these out would seem to be at odds with the eventual goal // of being able to split buckets that contain too many items. UnicodeSet labelSet; for (int32_t psIndex=0; psIndex<preferenceSorting.size(); psIndex++) { UnicodeString item = *static_cast<const UnicodeString *>(preferenceSorting.elementAt(psIndex)); // TODO: Since preferenceSorting was originally populated from the contents of a UnicodeSet, // is it even possible for duplicates to show up in this check? if (labelSet.contains(item)) { UnicodeSetIterator itemAlreadyInIter(labelSet); while (itemAlreadyInIter.next()) { const UnicodeString &itemAlreadyIn = itemAlreadyInIter.getString(); if (collatorPrimaryOnly_->compare(item, itemAlreadyIn) == 0) { UnicodeSet *targets = static_cast<UnicodeSet *>(uhash_get(alreadyIn_, &itemAlreadyIn)); if (targets == NULL) { // alreadyIn.put(itemAlreadyIn, targets = new LinkedHashSet<String>()); targets = new UnicodeSet(); uhash_put(alreadyIn_, itemAlreadyIn.clone(), targets, &status); } targets->add(item); break; } } } else if (item.moveIndex32(0, 1) < item.length() && // Label contains more than one code point. collatorPrimaryOnly_->compare(item, separated(item)) == 0) { noDistinctSorting_->add(item); } else if (!ALPHABETIC->containsSome(item)) { notAlphabetic_->add(item); } else { labelSet.add(item); } } // If we have no labels, hard-code a fallback default set of [A-Z] // This case can occur with locales that don't have exemplar character data, including root. // A no-labels situation will cause other problems; it needs to be avoided. if (labelSet.isEmpty()) { labelSet.add((UChar32)0x41, (UChar32)0x5A); } // Move the set of Labels from the set into a vector, and sort // according to the collator. appendUnicodeSetToUVector(*labels_, labelSet, status); labels_->sortWithUComparator(sortCollateComparator, collatorPrimaryOnly_, status); // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element // Implemented by copying the elements to be retained to a new UVector. const int32_t size = labelSet.size() - 1; if (size > maxLabelCount_) { UVector *newLabels = new UVector(status); newLabels->setDeleter(uprv_deleteUObject); int32_t count = 0; int32_t old = -1; for (int32_t srcIndex=0; srcIndex<labels_->size(); srcIndex++) { const UnicodeString *str = static_cast<const UnicodeString *>(labels_->elementAt(srcIndex)); ++count; const int32_t bump = count * maxLabelCount_ / size; if (bump == old) { // it.remove(); } else { newLabels->addElement(str->clone(), status); old = bump; } } delete labels_; labels_ = newLabels; } // We now know the list of labels. // Create a corresponding list of buckets, one per label. buildBucketList(status); // Corresponds to Java BucketList constructor. // Bin the Records into the Buckets. bucketRecords(status); indexBuildRequired_ = FALSE; resetBucketIterator(status); }
// Build the Whole Script Confusable data // // TODO: Reorganize. Either get rid of the WSConfusableDataBuilder class, // because everything is local to this one build function anyhow, // OR // break this function into more reasonably sized pieces, with // state in WSConfusableDataBuilder. // void buildWSConfusableData(SpoofImpl *spImpl, const char * confusablesWS, int32_t confusablesWSLen, UParseError *pe, UErrorCode &status) { if (U_FAILURE(status)) { return; } URegularExpression *parseRegexp = NULL; int32_t inputLen = 0; UChar *input = NULL; int32_t lineNum = 0; UVector *scriptSets = NULL; uint32_t rtScriptSetsCount = 2; UTrie2 *anyCaseTrie = NULL; UTrie2 *lowerCaseTrie = NULL; anyCaseTrie = utrie2_open(0, 0, &status); lowerCaseTrie = utrie2_open(0, 0, &status); // The scriptSets vector provides a mapping from TRIE values to the set of scripts. // // Reserved TRIE values: // 0: Code point has no whole script confusables. // 1: Code point is of script Common or Inherited. // These code points do not participate in whole script confusable detection. // (This is logically equivalent to saying that they contain confusables in // all scripts) // // Because Trie values are indexes into the ScriptSets vector, pre-fill // vector positions 0 and 1 to avoid conflicts with the reserved values. scriptSets = new UVector(status); if (scriptSets == NULL) { status = U_MEMORY_ALLOCATION_ERROR; goto cleanup; } scriptSets->addElement((void *)NULL, status); scriptSets->addElement((void *)NULL, status); // Convert the user input data from UTF-8 to UChar (UTF-16) u_strFromUTF8(NULL, 0, &inputLen, confusablesWS, confusablesWSLen, &status); if (status != U_BUFFER_OVERFLOW_ERROR) { goto cleanup; } status = U_ZERO_ERROR; input = static_cast<UChar *>(uprv_malloc((inputLen+1) * sizeof(UChar))); if (input == NULL) { status = U_MEMORY_ALLOCATION_ERROR; goto cleanup; } u_strFromUTF8(input, inputLen+1, NULL, confusablesWS, confusablesWSLen, &status); parseRegexp = uregex_openC(parseExp, 0, NULL, &status); // Zap any Byte Order Mark at the start of input. Changing it to a space is benign // given the syntax of the input. if (*input == 0xfeff) { *input = 0x20; } // Parse the input, one line per iteration of this loop. uregex_setText(parseRegexp, input, inputLen, &status); while (uregex_findNext(parseRegexp, &status)) { lineNum++; UChar line[200]; uregex_group(parseRegexp, 0, line, 200, &status); if (uregex_start(parseRegexp, 1, &status) >= 0) { // this was a blank or comment line. continue; } if (uregex_start(parseRegexp, 8, &status) >= 0) { // input file syntax error. status = U_PARSE_ERROR; goto cleanup; } if (U_FAILURE(status)) { goto cleanup; } // Pick up the start and optional range end code points from the parsed line. UChar32 startCodePoint = SpoofImpl::ScanHex( input, uregex_start(parseRegexp, 2, &status), uregex_end(parseRegexp, 2, &status), status); UChar32 endCodePoint = startCodePoint; if (uregex_start(parseRegexp, 3, &status) >=0) { endCodePoint = SpoofImpl::ScanHex( input, uregex_start(parseRegexp, 3, &status), uregex_end(parseRegexp, 3, &status), status); } // Extract the two script names from the source line. We need these in an 8 bit // default encoding (will be EBCDIC on IBM mainframes) in order to pass them on // to the ICU u_getPropertyValueEnum() function. Ugh. char srcScriptName[20]; char targScriptName[20]; extractGroup(parseRegexp, 4, srcScriptName, sizeof(srcScriptName), status); extractGroup(parseRegexp, 5, targScriptName, sizeof(targScriptName), status); UScriptCode srcScript = static_cast<UScriptCode>(u_getPropertyValueEnum(UCHAR_SCRIPT, srcScriptName)); UScriptCode targScript = static_cast<UScriptCode>(u_getPropertyValueEnum(UCHAR_SCRIPT, targScriptName)); if (U_FAILURE(status)) { goto cleanup; } if (srcScript == USCRIPT_INVALID_CODE || targScript == USCRIPT_INVALID_CODE) { status = U_INVALID_FORMAT_ERROR; goto cleanup; } // select the table - (A) any case or (L) lower case only UTrie2 *table = anyCaseTrie; if (uregex_start(parseRegexp, 7, &status) >= 0) { table = lowerCaseTrie; } // Build the set of scripts containing confusable characters for // the code point(s) specified in this input line. // Sanity check that the script of the source code point is the same // as the source script indicated in the input file. Failure of this check is // an error in the input file. // Include the source script in the set (needed for Mixed Script Confusable detection). // UChar32 cp; for (cp=startCodePoint; cp<=endCodePoint; cp++) { int32_t setIndex = utrie2_get32(table, cp); BuilderScriptSet *bsset = NULL; if (setIndex > 0) { U_ASSERT(setIndex < scriptSets->size()); bsset = static_cast<BuilderScriptSet *>(scriptSets->elementAt(setIndex)); } else { bsset = new BuilderScriptSet(); if (bsset == NULL) { status = U_MEMORY_ALLOCATION_ERROR; goto cleanup; } bsset->codePoint = cp; bsset->trie = table; bsset->sset = new ScriptSet(); setIndex = scriptSets->size(); bsset->index = setIndex; bsset->rindex = 0; if (bsset->sset == NULL) { status = U_MEMORY_ALLOCATION_ERROR; goto cleanup; } scriptSets->addElement(bsset, status); utrie2_set32(table, cp, setIndex, &status); } bsset->sset->Union(targScript); bsset->sset->Union(srcScript); if (U_FAILURE(status)) { goto cleanup; } UScriptCode cpScript = uscript_getScript(cp, &status); if (cpScript != srcScript) { status = U_INVALID_FORMAT_ERROR; goto cleanup; } } } // Eliminate duplicate script sets. At this point we have a separate // script set for every code point that had data in the input file. // // We eliminate underlying ScriptSet objects, not the BuildScriptSets that wrap them // // printf("Number of scriptSets: %d\n", scriptSets->size()); { int32_t duplicateCount = 0; rtScriptSetsCount = 2; for (int32_t outeri=2; outeri<scriptSets->size(); outeri++) { BuilderScriptSet *outerSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(outeri)); if (outerSet->index != static_cast<uint32_t>(outeri)) { // This set was already identified as a duplicate. // It will not be allocated a position in the runtime array of ScriptSets. continue; } outerSet->rindex = rtScriptSetsCount++; for (int32_t inneri=outeri+1; inneri<scriptSets->size(); inneri++) { BuilderScriptSet *innerSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(inneri)); if (*(outerSet->sset) == *(innerSet->sset) && outerSet->sset != innerSet->sset) { delete innerSet->sset; innerSet->scriptSetOwned = FALSE; innerSet->sset = outerSet->sset; innerSet->index = outeri; innerSet->rindex = outerSet->rindex; duplicateCount++; } // But this doesn't get all. We need to fix the TRIE. } } // printf("Number of distinct script sets: %d\n", rtScriptSetsCount); } // Update the Trie values to be reflect the run time script indexes (after duplicate merging). // (Trie Values 0 and 1 are reserved, and the corresponding slots in scriptSets // are unused, which is why the loop index starts at 2.) { for (int32_t i=2; i<scriptSets->size(); i++) { BuilderScriptSet *bSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(i)); if (bSet->rindex != (uint32_t)i) { utrie2_set32(bSet->trie, bSet->codePoint, bSet->rindex, &status); } } } // For code points with script==Common or script==Inherited, // Set the reserved value of 1 into both Tries. These characters do not participate // in Whole Script Confusable detection; this reserved value is the means // by which they are detected. { UnicodeSet ignoreSet; ignoreSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_COMMON, status); UnicodeSet inheritedSet; inheritedSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_INHERITED, status); ignoreSet.addAll(inheritedSet); for (int32_t rn=0; rn<ignoreSet.getRangeCount(); rn++) { UChar32 rangeStart = ignoreSet.getRangeStart(rn); UChar32 rangeEnd = ignoreSet.getRangeEnd(rn); utrie2_setRange32(anyCaseTrie, rangeStart, rangeEnd, 1, TRUE, &status); utrie2_setRange32(lowerCaseTrie, rangeStart, rangeEnd, 1, TRUE, &status); } } // Serialize the data to the Spoof Detector { utrie2_freeze(anyCaseTrie, UTRIE2_16_VALUE_BITS, &status); int32_t size = utrie2_serialize(anyCaseTrie, NULL, 0, &status); // printf("Any case Trie size: %d\n", size); if (status != U_BUFFER_OVERFLOW_ERROR) { goto cleanup; } status = U_ZERO_ERROR; spImpl->fSpoofData->fRawData->fAnyCaseTrie = spImpl->fSpoofData->fMemLimit; spImpl->fSpoofData->fRawData->fAnyCaseTrieLength = size; spImpl->fSpoofData->fAnyCaseTrie = anyCaseTrie; void *where = spImpl->fSpoofData->reserveSpace(size, status); utrie2_serialize(anyCaseTrie, where, size, &status); utrie2_freeze(lowerCaseTrie, UTRIE2_16_VALUE_BITS, &status); size = utrie2_serialize(lowerCaseTrie, NULL, 0, &status); // printf("Lower case Trie size: %d\n", size); if (status != U_BUFFER_OVERFLOW_ERROR) { goto cleanup; } status = U_ZERO_ERROR; spImpl->fSpoofData->fRawData->fLowerCaseTrie = spImpl->fSpoofData->fMemLimit; spImpl->fSpoofData->fRawData->fLowerCaseTrieLength = size; spImpl->fSpoofData->fLowerCaseTrie = lowerCaseTrie; where = spImpl->fSpoofData->reserveSpace(size, status); utrie2_serialize(lowerCaseTrie, where, size, &status); spImpl->fSpoofData->fRawData->fScriptSets = spImpl->fSpoofData->fMemLimit; spImpl->fSpoofData->fRawData->fScriptSetsLength = rtScriptSetsCount; ScriptSet *rtScriptSets = static_cast<ScriptSet *> (spImpl->fSpoofData->reserveSpace(rtScriptSetsCount * sizeof(ScriptSet), status)); uint32_t rindex = 2; for (int32_t i=2; i<scriptSets->size(); i++) { BuilderScriptSet *bSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(i)); if (bSet->rindex < rindex) { // We have already copied this script set to the serialized data. continue; } U_ASSERT(rindex == bSet->rindex); rtScriptSets[rindex] = *bSet->sset; // Assignment of a ScriptSet just copies the bits. rindex++; } } // Open new utrie2s from the serialized data. We don't want to keep the ones // we just built because we would then have two copies of the data, one internal to // the utries that we have already constructed, and one in the serialized data area. // An alternative would be to not pre-serialize the Trie data, but that makes the // spoof detector data different, depending on how the detector was constructed. // It's simpler to keep the data always the same. spImpl->fSpoofData->fAnyCaseTrie = utrie2_openFromSerialized( UTRIE2_16_VALUE_BITS, (const char *)spImpl->fSpoofData->fRawData + spImpl->fSpoofData->fRawData->fAnyCaseTrie, spImpl->fSpoofData->fRawData->fAnyCaseTrieLength, NULL, &status); spImpl->fSpoofData->fLowerCaseTrie = utrie2_openFromSerialized( UTRIE2_16_VALUE_BITS, (const char *)spImpl->fSpoofData->fRawData + spImpl->fSpoofData->fRawData->fLowerCaseTrie, spImpl->fSpoofData->fRawData->fAnyCaseTrieLength, NULL, &status); cleanup: if (U_FAILURE(status)) { pe->line = lineNum; } uregex_close(parseRegexp); uprv_free(input); int32_t i; for (i=0; i<scriptSets->size(); i++) { BuilderScriptSet *bsset = static_cast<BuilderScriptSet *>(scriptSets->elementAt(i)); delete bsset; } delete scriptSets; utrie2_close(anyCaseTrie); utrie2_close(lowerCaseTrie); return; }
/** * Convert the elements of the 'list' vector, which are SingleID * objects, into actual Transliterator objects. In the course of * this, some (or all) entries may be removed. If all entries * are removed, the NULL transliterator will be added. * * Delete entries with empty basicIDs; these are generated by * elements like "(A)" in the forward direction, or "A()" in * the reverse. THIS MAY RESULT IN AN EMPTY VECTOR. Convert * SingleID entries to actual transliterators. * * @param list vector of SingleID objects. On exit, vector * of one or more Transliterators. * @return new value of insertIndex. The index will shift if * there are empty items, like "(Lower)", with indices less than * insertIndex. */ void TransliteratorIDParser::instantiateList(UVector & list, UErrorCode & ec) { UVector tlist(ec); if (U_FAILURE(ec)) { goto RETURN; } tlist.setDeleter(_deleteTransliteratorTrIDPars); Transliterator * t; int32_t i; for (i = 0; i <= list.size(); ++i) // [sic]: i<=list.size() { // We run the loop too long by one, so we can // do an insert after the last element if (i == list.size()) { break; } SingleID * single = (SingleID *) list.elementAt(i); if (single->basicID.length() != 0) { t = single->createInstance(); if (t == NULL) { ec = U_INVALID_ID; goto RETURN; } tlist.addElement(t, ec); if (U_FAILURE(ec)) { delete t; goto RETURN; } } } // An empty list is equivalent to a NULL transliterator. if (tlist.size() == 0) { t = createBasicInstance(ANY_NULL, NULL); if (t == NULL) { // Should never happen ec = U_INTERNAL_TRANSLITERATOR_ERROR; } tlist.addElement(t, ec); if (U_FAILURE(ec)) { delete t; } } RETURN: UObjectDeleter * save = list.setDeleter(_deleteSingleID); list.removeAllElements(); if (U_SUCCESS(ec)) { list.setDeleter(_deleteTransliteratorTrIDPars); while (tlist.size() > 0) { t = (Transliterator *) tlist.orphanElementAt(0); list.addElement(t, ec); if (U_FAILURE(ec)) { delete t; list.removeAllElements(); break; } } } list.setDeleter(save); }
/** * Given an Entry object, instantiate it. Caller owns result. Return * 0 on failure. * * Return a non-empty aliasReturn value if the ID points to an alias. * We cannot instantiate it ourselves because the alias may contain * filters or compounds, which we do not understand. Caller should * make aliasReturn empty before calling. * * The entry object is assumed to reside in the dynamic store. It may be * modified. */ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID, TransliteratorEntry *entry, TransliteratorAlias* &aliasReturn, UErrorCode& status) { Transliterator *t = 0; U_ASSERT(aliasReturn == 0); switch (entry->entryType) { case TransliteratorEntry::RBT_DATA: t = new RuleBasedTransliterator(ID, entry->u.data); if (t == 0) { status = U_MEMORY_ALLOCATION_ERROR; } return t; case TransliteratorEntry::PROTOTYPE: t = entry->u.prototype->clone(); if (t == 0) { status = U_MEMORY_ALLOCATION_ERROR; } return t; case TransliteratorEntry::ALIAS: aliasReturn = new TransliteratorAlias(entry->stringArg, entry->compoundFilter); if (aliasReturn == 0) { status = U_MEMORY_ALLOCATION_ERROR; } return 0; case TransliteratorEntry::FACTORY: t = entry->u.factory.function(ID, entry->u.factory.context); if (t == 0) { status = U_MEMORY_ALLOCATION_ERROR; } return t; case TransliteratorEntry::COMPOUND_RBT: { UVector* rbts = new UVector(entry->u.dataVector->size(), status); // Check for null pointer if (rbts == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } int32_t passNumber = 1; for (int32_t i = 0; U_SUCCESS(status) && i < entry->u.dataVector->size(); i++) { // TODO: Should passNumber be turned into a decimal-string representation (1 -> "1")? Transliterator* t = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + UnicodeString(passNumber++), (TransliterationRuleData*)(entry->u.dataVector->elementAt(i)), FALSE); if (t == 0) status = U_MEMORY_ALLOCATION_ERROR; else rbts->addElement(t, status); } if (U_FAILURE(status)) { delete rbts; return 0; } aliasReturn = new TransliteratorAlias(ID, entry->stringArg, rbts, entry->compoundFilter); } if (aliasReturn == 0) { status = U_MEMORY_ALLOCATION_ERROR; } return 0; case TransliteratorEntry::LOCALE_RULES: aliasReturn = new TransliteratorAlias(ID, entry->stringArg, (UTransDirection) entry->intArg); if (aliasReturn == 0) { status = U_MEMORY_ALLOCATION_ERROR; } return 0; case TransliteratorEntry::RULES_FORWARD: case TransliteratorEntry::RULES_REVERSE: // Process the rule data into a TransliteratorRuleData object, // and possibly also into an ::id header and/or footer. Then // we modify the registry with the parsed data and retry. { TransliteratorParser parser(status); // We use the file name, taken from another resource bundle // 2-d array at static init time, as a locale language. We're // just using the locale mechanism to map through to a file // name; this in no way represents an actual locale. //CharString ch(entry->stringArg); //UResourceBundle *bundle = ures_openDirect(0, ch, &status); UnicodeString rules = entry->stringArg; //ures_close(bundle); //if (U_FAILURE(status)) { // We have a failure of some kind. Remove the ID from the // registry so we don't keep trying. NOTE: This will throw off // anyone who is, at the moment, trying to iterate over the // available IDs. That's acceptable since we should never // really get here except under installation, configuration, // or unrecoverable run time memory failures. // remove(ID); //} else { // If the status indicates a failure, then we don't have any // rules -- there is probably an installation error. The list // in the root locale should correspond to all the installed // transliterators; if it lists something that's not // installed, we'll get an error from ResourceBundle. aliasReturn = new TransliteratorAlias(ID, rules, ((entry->entryType == TransliteratorEntry::RULES_REVERSE) ? UTRANS_REVERSE : UTRANS_FORWARD)); if (aliasReturn == 0) { status = U_MEMORY_ALLOCATION_ERROR; } //} } return 0; default: U_ASSERT(FALSE); // can't get here return 0; } }
void ZoneMeta::initAvailableMetaZoneIDs () { UBool initialized; UMTX_CHECK(&gZoneMetaLock, gMetaZoneIDsInitialized, initialized); if (!initialized) { umtx_lock(&gZoneMetaLock); { if (!gMetaZoneIDsInitialized) { UErrorCode status = U_ZERO_ERROR; UHashtable *metaZoneIDTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status); uhash_setKeyDeleter(metaZoneIDTable, uprv_deleteUObject); // No valueDeleter, because the vector maintain the value objects UVector *metaZoneIDs = NULL; if (U_SUCCESS(status)) { metaZoneIDs = new UVector(NULL, uhash_compareUChars, status); if (metaZoneIDs == NULL) { status = U_MEMORY_ALLOCATION_ERROR; } } else { uhash_close(metaZoneIDTable); } if (U_SUCCESS(status)) { U_ASSERT(metaZoneIDs != NULL); metaZoneIDs->setDeleter(uprv_free); UResourceBundle *rb = ures_openDirect(NULL, gMetaZones, &status); UResourceBundle *bundle = ures_getByKey(rb, gMapTimezonesTag, NULL, &status); UResourceBundle res; ures_initStackObject(&res); while (U_SUCCESS(status) && ures_hasNext(bundle)) { ures_getNextResource(bundle, &res, &status); if (U_FAILURE(status)) { break; } const char *mzID = ures_getKey(&res); int32_t len = uprv_strlen(mzID); UChar *uMzID = (UChar*)uprv_malloc(sizeof(UChar) * (len + 1)); if (uMzID == NULL) { status = U_MEMORY_ALLOCATION_ERROR; break; } u_charsToUChars(mzID, uMzID, len); uMzID[len] = 0; UnicodeString *usMzID = new UnicodeString(uMzID); if (uhash_get(metaZoneIDTable, usMzID) == NULL) { metaZoneIDs->addElement((void *)uMzID, status); uhash_put(metaZoneIDTable, (void *)usMzID, (void *)uMzID, &status); } else { uprv_free(uMzID); delete usMzID; } } if (U_SUCCESS(status)) { gMetaZoneIDs = metaZoneIDs; gMetaZoneIDTable = metaZoneIDTable; gMetaZoneIDsInitialized = TRUE; } else { uhash_close(metaZoneIDTable); delete metaZoneIDs; } ures_close(&res); ures_close(bundle); ures_close(rb); } } } umtx_unlock(&gZoneMetaLock); } }
/* * Initializes the region data from the ICU resource bundles. The region data * contains the basic relationships such as which regions are known, what the numeric * codes are, any known aliases, and the territory containment data. * * If the region data has already loaded, then this method simply returns without doing * anything meaningful. */ void Region::loadRegionData() { if (regionDataIsLoaded) { return; } umtx_lock(&gRegionDataLock); if (regionDataIsLoaded) { // In case another thread gets to it before we do... umtx_unlock(&gRegionDataLock); return; } UErrorCode status = U_ZERO_ERROR; UResourceBundle* regionCodes = NULL; UResourceBundle* territoryAlias = NULL; UResourceBundle* codeMappings = NULL; UResourceBundle* worldContainment = NULL; UResourceBundle* territoryContainment = NULL; UResourceBundle* groupingContainment = NULL; DecimalFormat *df = new DecimalFormat(status); df->setParseIntegerOnly(TRUE); regionIDMap = uhash_open(uhash_hashUnicodeString,uhash_compareUnicodeString,NULL,&status); uhash_setValueDeleter(regionIDMap, deleteRegion); numericCodeMap = uhash_open(uhash_hashLong,uhash_compareLong,NULL,&status); regionAliases = uhash_open(uhash_hashUnicodeString,uhash_compareUnicodeString,NULL,&status); uhash_setKeyDeleter(regionAliases,uprv_deleteUObject); UResourceBundle *rb = ures_openDirect(NULL,"metadata",&status); regionCodes = ures_getByKey(rb,"regionCodes",NULL,&status); territoryAlias = ures_getByKey(rb,"territoryAlias",NULL,&status); UResourceBundle *rb2 = ures_openDirect(NULL,"supplementalData",&status); codeMappings = ures_getByKey(rb2,"codeMappings",NULL,&status); territoryContainment = ures_getByKey(rb2,"territoryContainment",NULL,&status); worldContainment = ures_getByKey(territoryContainment,"001",NULL,&status); groupingContainment = ures_getByKey(territoryContainment,"grouping",NULL,&status); UVector *continents = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); while ( ures_hasNext(worldContainment) ) { UnicodeString *continentName = new UnicodeString(ures_getNextUnicodeString(worldContainment,NULL,&status)); continents->addElement(continentName,status); } UVector *groupings = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); while ( ures_hasNext(groupingContainment) ) { UnicodeString *groupingName = new UnicodeString(ures_getNextUnicodeString(groupingContainment,NULL,&status)); groupings->addElement(groupingName,status); } while ( ures_hasNext(regionCodes) ) { UnicodeString regionID = ures_getNextUnicodeString(regionCodes,NULL,&status); Region *r = new Region(); r->idStr = regionID; r->idStr.extract(0,r->idStr.length(),r->id,sizeof(r->id),US_INV); r->type = URGN_TERRITORY; // Only temporary - figure out the real type later once the aliases are known. uhash_put(regionIDMap,(void *)&(r->idStr),(void *)r,&status); Formattable result; UErrorCode ps = U_ZERO_ERROR; df->parse(r->idStr,result,ps); if ( U_SUCCESS(ps) ) { r->code = result.getLong(); // Convert string to number uhash_iput(numericCodeMap,r->code,(void *)r,&status); r->type = URGN_SUBCONTINENT; } else { r->code = Region::UNDEFINED_NUMERIC_CODE; } } // Process the territory aliases while ( ures_hasNext(territoryAlias) ) { UResourceBundle *res = ures_getNextResource(territoryAlias,NULL,&status); const char *aliasFrom = ures_getKey(res); UnicodeString* aliasFromStr = new UnicodeString(aliasFrom); UnicodeString aliasTo = ures_getUnicodeString(res,&status); ures_close(res); Region *aliasToRegion = (Region *) uhash_get(regionIDMap,&aliasTo); Region *aliasFromRegion = (Region *)uhash_get(regionIDMap,aliasFromStr); if ( aliasToRegion != NULL && aliasFromRegion == NULL ) { // This is just an alias from some string to a region uhash_put(regionAliases,(void *)aliasFromStr, (void *)aliasToRegion,&status); } else { if ( aliasFromRegion == NULL ) { // Deprecated region code not in the master codes list - so need to create a deprecated region for it. aliasFromRegion = new Region(); aliasFromRegion->idStr.setTo(*aliasFromStr); aliasFromRegion->idStr.extract(0,aliasFromRegion->idStr.length(),aliasFromRegion->id,sizeof(aliasFromRegion->id),US_INV); uhash_put(regionIDMap,(void *)&(aliasFromRegion->idStr),(void *)aliasFromRegion,&status); Formattable result; UErrorCode ps = U_ZERO_ERROR; df->parse(aliasFromRegion->idStr,result,ps); if ( U_SUCCESS(ps) ) { aliasFromRegion->code = result.getLong(); // Convert string to number uhash_iput(numericCodeMap,aliasFromRegion->code,(void *)aliasFromRegion,&status); } else { aliasFromRegion->code = Region::UNDEFINED_NUMERIC_CODE; } aliasFromRegion->type = URGN_DEPRECATED; } else { aliasFromRegion->type = URGN_DEPRECATED; } delete aliasFromStr; aliasFromRegion->preferredValues = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); UnicodeString currentRegion; currentRegion.remove(); for (int32_t i = 0 ; i < aliasTo.length() ; i++ ) { if ( aliasTo.charAt(i) != 0x0020 ) { currentRegion.append(aliasTo.charAt(i)); } if ( aliasTo.charAt(i) == 0x0020 || i+1 == aliasTo.length() ) { Region *target = (Region *)uhash_get(regionIDMap,(void *)¤tRegion); if (target) { UnicodeString *preferredValue = new UnicodeString(target->idStr); aliasFromRegion->preferredValues->addElement((void *)preferredValue,status); } currentRegion.remove(); } } } } // Process the code mappings - This will allow us to assign numeric codes to most of the territories. while ( ures_hasNext(codeMappings) ) { UResourceBundle *mapping = ures_getNextResource(codeMappings,NULL,&status); if ( ures_getType(mapping) == URES_ARRAY && ures_getSize(mapping) == 3) { UnicodeString codeMappingID = ures_getUnicodeStringByIndex(mapping,0,&status); UnicodeString codeMappingNumber = ures_getUnicodeStringByIndex(mapping,1,&status); UnicodeString codeMapping3Letter = ures_getUnicodeStringByIndex(mapping,2,&status); Region *r = (Region *)uhash_get(regionIDMap,(void *)&codeMappingID); if ( r ) { Formattable result; UErrorCode ps = U_ZERO_ERROR; df->parse(codeMappingNumber,result,ps); if ( U_SUCCESS(ps) ) { r->code = result.getLong(); // Convert string to number uhash_iput(numericCodeMap,r->code,(void *)r,&status); } UnicodeString *code3 = new UnicodeString(codeMapping3Letter); uhash_put(regionAliases,(void *)code3, (void *)r,&status); } } ures_close(mapping); } // Now fill in the special cases for WORLD, UNKNOWN, CONTINENTS, and GROUPINGS Region *r; r = (Region *) uhash_get(regionIDMap,(void *)&WORLD_ID); if ( r ) { r->type = URGN_WORLD; } r = (Region *) uhash_get(regionIDMap,(void *)&UNKNOWN_REGION_ID); if ( r ) { r->type = URGN_UNKNOWN; } for ( int32_t i = 0 ; i < continents->size() ; i++ ) { r = (Region *) uhash_get(regionIDMap,(void *)continents->elementAt(i)); if ( r ) { r->type = URGN_CONTINENT; } } delete continents; for ( int32_t i = 0 ; i < groupings->size() ; i++ ) { r = (Region *) uhash_get(regionIDMap,(void *)groupings->elementAt(i)); if ( r ) { r->type = URGN_GROUPING; } } delete groupings; // Special case: The region code "QO" (Outlying Oceania) is a subcontinent code added by CLDR // even though it looks like a territory code. Need to handle it here. r = (Region *) uhash_get(regionIDMap,(void *)&OUTLYING_OCEANIA_REGION_ID); if ( r ) { r->type = URGN_SUBCONTINENT; } // Load territory containment info from the supplemental data. while ( ures_hasNext(territoryContainment) ) { UResourceBundle *mapping = ures_getNextResource(territoryContainment,NULL,&status); const char *parent = ures_getKey(mapping); UnicodeString parentStr = UnicodeString(parent); Region *parentRegion = (Region *) uhash_get(regionIDMap,(void *)&parentStr); for ( int j = 0 ; j < ures_getSize(mapping); j++ ) { UnicodeString child = ures_getUnicodeStringByIndex(mapping,j,&status); Region *childRegion = (Region *) uhash_get(regionIDMap,(void *)&child); if ( parentRegion != NULL && childRegion != NULL ) { // Add the child region to the set of regions contained by the parent if (parentRegion->containedRegions == NULL) { parentRegion->containedRegions = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); } UnicodeString *childStr = new UnicodeString(status); childStr->fastCopyFrom(childRegion->idStr); parentRegion->containedRegions->addElement((void *)childStr,status); // Set the parent region to be the containing region of the child. // Regions of type GROUPING can't be set as the parent, since another region // such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent. if ( parentRegion->type != URGN_GROUPING) { childRegion->containingRegion = parentRegion; } } } ures_close(mapping); } // Create the availableRegions lists int32_t pos = -1; while ( const UHashElement* element = uhash_nextElement(regionIDMap,&pos)) { Region *ar = (Region *)element->value.pointer; if ( availableRegions[ar->type] == NULL ) { availableRegions[ar->type] = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); } UnicodeString *arString = new UnicodeString(ar->idStr); availableRegions[ar->type]->addElement((void *)arString,status); } ures_close(territoryContainment); ures_close(worldContainment); ures_close(groupingContainment); ures_close(codeMappings); ures_close(rb2); ures_close(territoryAlias); ures_close(regionCodes); ures_close(rb); delete df; ucln_i18n_registerCleanup(UCLN_I18N_REGION, region_cleanup); regionDataIsLoaded = true; umtx_unlock(&gRegionDataLock); }
/* * Creating Olson tzid to metazone mappings from resource (3.8.1 and beyond) */ UHashtable* ZoneMeta::createOlsonToMetaMap(void) { UErrorCode status = U_ZERO_ERROR; UHashtable *olsonToMeta = NULL; UResourceBundle *metazoneMappings = NULL; UResourceBundle *zoneItem = NULL; UResourceBundle *mz = NULL; StringEnumeration *tzids = NULL; olsonToMeta = uhash_open(uhash_hashUChars, uhash_compareUChars, NULL, &status); if (U_FAILURE(status)) { return NULL; } uhash_setKeyDeleter(olsonToMeta, deleteUCharString); uhash_setValueDeleter(olsonToMeta, deleteUVector); // Read metazone mappings from metazoneInfo bundle metazoneMappings = ures_openDirect(NULL, gMetazoneInfo, &status); metazoneMappings = ures_getByKey(metazoneMappings, gMetazoneMappings, metazoneMappings, &status); if (U_FAILURE(status)) { goto error_cleanup; } // Walk through all canonical tzids char zidkey[ZID_KEY_MAX]; tzids = TimeZone::createEnumeration(); const UnicodeString *tzid; while ((tzid = tzids->snext(status))) { if (U_FAILURE(status)) { goto error_cleanup; } // We may skip aliases, because the bundle // contains only canonical IDs. For now, try // all of them. tzid->extract(0, tzid->length(), zidkey, sizeof(zidkey), US_INV); zidkey[sizeof(zidkey)-1] = 0; // NULL terminate just in case. // Replace '/' with ':' UBool foundSep = FALSE; char *p = zidkey; while (*p) { if (*p == '/') { *p = ':'; foundSep = TRUE; } p++; } if (!foundSep) { // A valid time zone key has at least one separator continue; } zoneItem = ures_getByKey(metazoneMappings, zidkey, zoneItem, &status); if (U_FAILURE(status)) { status = U_ZERO_ERROR; continue; } UVector *mzMappings = NULL; while (ures_hasNext(zoneItem)) { mz = ures_getNextResource(zoneItem, mz, &status); const UChar *mz_name = ures_getStringByIndex(mz, 0, NULL, &status); const UChar *mz_from = ures_getStringByIndex(mz, 1, NULL, &status); const UChar *mz_to = ures_getStringByIndex(mz, 2, NULL, &status); if(U_FAILURE(status)){ status = U_ZERO_ERROR; continue; } // We do not want to use SimpleDateformat to parse boundary dates, // because this code could be triggered by the initialization code // used by SimpleDateFormat. UDate from = parseDate(mz_from, status); UDate to = parseDate(mz_to, status); if (U_FAILURE(status)) { status = U_ZERO_ERROR; continue; } OlsonToMetaMappingEntry *entry = (OlsonToMetaMappingEntry*)uprv_malloc(sizeof(OlsonToMetaMappingEntry)); if (entry == NULL) { status = U_MEMORY_ALLOCATION_ERROR; break; } entry->mzid = mz_name; entry->from = from; entry->to = to; if (mzMappings == NULL) { mzMappings = new UVector(deleteOlsonToMetaMappingEntry, NULL, status); if (U_FAILURE(status)) { delete mzMappings; deleteOlsonToMetaMappingEntry(entry); uprv_free(entry); break; } } mzMappings->addElement(entry, status); if (U_FAILURE(status)) { break; } } if (U_FAILURE(status)) { if (mzMappings != NULL) { delete mzMappings; } goto error_cleanup; } if (mzMappings != NULL) { // Add to hashtable int32_t tzidLen = tzid->length() + 1; // Add one for NUL terminator UChar *key = (UChar*)uprv_malloc(tzidLen * sizeof(UChar)); if (key == NULL) { status = U_MEMORY_ALLOCATION_ERROR; delete mzMappings; goto error_cleanup; } tzid->extract(key, tzidLen, status); uhash_put(olsonToMeta, key, mzMappings, &status); if (U_FAILURE(status)) { goto error_cleanup; } } } normal_cleanup: if (tzids != NULL) { delete tzids; } ures_close(zoneItem); ures_close(mz); ures_close(metazoneMappings); return olsonToMeta; error_cleanup: if (olsonToMeta != NULL) { uhash_close(olsonToMeta); olsonToMeta = NULL; } goto normal_cleanup; }