UVector& ICUService::getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorCode& status) const { result.removeAllElements(); if (U_FAILURE(status)) { return result; } { Mutex mutex(&lock); const Hashtable* map = getVisibleIDMap(status); if (map != NULL) { ICUServiceKey* fallbackKey = createKey(matchID, status); for (int32_t pos = UHASH_FIRST;;) { const UHashElement* e = map->nextElement(pos); if (e == NULL) { break; } const UnicodeString* id = (const UnicodeString*)e->key.pointer; if (fallbackKey != NULL) { if (!fallbackKey->isFallbackOf(*id)) { continue; } } UnicodeString* idClone = new UnicodeString(*id); if (idClone == NULL || idClone->isBogus()) { delete idClone; status = U_MEMORY_ALLOCATION_ERROR; break; } result.addElement(idClone, status); if (U_FAILURE(status)) { delete idClone; break; } } delete fallbackKey; } } if (U_FAILURE(status)) { result.removeAllElements(); } return result; }
void U_EXPORT2 Normalizer::normalize(const UnicodeString & source, UNormalizationMode mode, int32_t options, UnicodeString & result, UErrorCode & status) { if (source.isBogus() || U_FAILURE(status)) { result.setToBogus(); if (U_SUCCESS(status)) { status = U_ILLEGAL_ARGUMENT_ERROR; } } else { UnicodeString localDest; UnicodeString * dest; if (&source != &result) { dest = &result; } else { // the source and result strings are the same object, use a temporary one dest = &localDest; } const Normalizer2 * n2 = Normalizer2Factory::getInstance(mode, status); if (U_SUCCESS(status)) { if (options & UNORM_UNICODE_3_2) { FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). normalize(source, *dest, status); } else { n2->normalize(source, *dest, status); } } if (dest == &localDest && U_SUCCESS(status)) { result = *dest; } } }
UnicodeString& U_EXPORT2 ZoneMeta::getZoneIdByMetazone(const UnicodeString &mzid, const UnicodeString ®ion, UnicodeString &result) { UErrorCode status = U_ZERO_ERROR; const UChar *tzid = NULL; int32_t tzidLen = 0; char keyBuf[ZID_KEY_MAX + 1]; int32_t keyLen = 0; if (mzid.isBogus() || mzid.length() > ZID_KEY_MAX) { result.setToBogus(); return result; } keyLen = mzid.extract(0, mzid.length(), keyBuf, ZID_KEY_MAX + 1, US_INV); keyBuf[keyLen] = 0; UResourceBundle *rb = ures_openDirect(NULL, gMetaZones, &status); ures_getByKey(rb, gMapTimezonesTag, rb, &status); ures_getByKey(rb, keyBuf, rb, &status); if (U_SUCCESS(status)) { // check region mapping if (region.length() == 2 || region.length() == 3) { keyLen = region.extract(0, region.length(), keyBuf, ZID_KEY_MAX + 1, US_INV); keyBuf[keyLen] = 0; tzid = ures_getStringByKey(rb, keyBuf, &tzidLen, &status); if (status == U_MISSING_RESOURCE_ERROR) { status = U_ZERO_ERROR; } } if (U_SUCCESS(status) && tzid == NULL) { // try "001" tzid = ures_getStringByKey(rb, gWorldTag, &tzidLen, &status); } } ures_close(rb); if (tzid == NULL) { result.setToBogus(); } else { result.setTo(tzid, tzidLen); } return result; }
TransliteratorSpec::TransliteratorSpec(const UnicodeString& theSpec) : top(theSpec), res(0) { UErrorCode status = U_ZERO_ERROR; Locale topLoc(""); LocaleUtility::initLocaleFromName(theSpec, topLoc); if (!topLoc.isBogus()) { res = new ResourceBundle(U_ICUDATA_TRANSLIT, topLoc, status); /* test for NULL */ if (res == 0) { return; } if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) { delete res; res = 0; } } // Canonicalize script name -or- do locale->script mapping status = U_ZERO_ERROR; static const int32_t capacity = 10; UScriptCode script[capacity]={USCRIPT_INVALID_CODE}; int32_t num = uscript_getCode(CharString().appendInvariantChars(theSpec, status).data(), script, capacity, &status); if (num > 0 && script[0] != USCRIPT_INVALID_CODE) { scriptName = UnicodeString(uscript_getName(script[0]), -1, US_INV); } // Canonicalize top if (res != 0) { // Canonicalize locale name UnicodeString locStr; LocaleUtility::initNameFromLocale(topLoc, locStr); if (!locStr.isBogus()) { top = locStr; } } else if (scriptName.length() != 0) { // We are a script; use canonical name top = scriptName; } // assert(spec != top); reset(); }
// called only by factories, treat as private UObject* ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUServiceFactory* factory, UErrorCode& status) const { if (U_FAILURE(status)) { return NULL; } if (isDefault()) { return handleDefault(key, actualReturn, status); } ICUService* ncthis = (ICUService*)this; // cast away semantic const CacheEntry* result = NULL; { // The factory list can't be modified until we're done, // otherwise we might update the cache with an invalid result. // The cache has to stay in synch with the factory list. // ICU doesn't have monitors so we can't use rw locks, so // we single-thread everything using this service, for now. // if factory is not null, we're calling from within the mutex, // and since some unix machines don't have reentrant mutexes we // need to make sure not to try to lock it again. XMutex mutex(&lock, factory != NULL); if (serviceCache == NULL) { ncthis->serviceCache = new Hashtable(status); if (ncthis->serviceCache == NULL) { return NULL; } if (U_FAILURE(status)) { delete serviceCache; return NULL; } serviceCache->setValueDeleter(cacheDeleter); } UnicodeString currentDescriptor; UVectorDeleter cacheDescriptorList; UBool putInCache = FALSE; int32_t startIndex = 0; int32_t limit = factories->size(); UBool cacheResult = TRUE; if (factory != NULL) { for (int32_t i = 0; i < limit; ++i) { if (factory == (const ICUServiceFactory*)factories->elementAt(i)) { startIndex = i + 1; break; } } if (startIndex == 0) { // throw new InternalError("Factory " + factory + "not registered with service: " + this); status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } cacheResult = FALSE; } do { currentDescriptor.remove(); key.currentDescriptor(currentDescriptor); result = (CacheEntry*)serviceCache->get(currentDescriptor); if (result != NULL) { break; } // first test of cache failed, so we'll have to update // the cache if we eventually succeed-- that is, if we're // going to update the cache at all. putInCache = TRUE; int32_t index = startIndex; while (index < limit) { ICUServiceFactory* f = (ICUServiceFactory*)factories->elementAt(index++); UObject* service = f->create(key, this, status); if (U_FAILURE(status)) { delete service; return NULL; } if (service != NULL) { result = new CacheEntry(currentDescriptor, service); if (result == NULL) { delete service; status = U_MEMORY_ALLOCATION_ERROR; return NULL; } goto outerEnd; } } // prepare to load the cache with all additional ids that // will resolve to result, assuming we'll succeed. We // don't want to keep querying on an id that's going to // fallback to the one that succeeded, we want to hit the // cache the first time next goaround. if (cacheDescriptorList._obj == NULL) { cacheDescriptorList._obj = new UVector(uprv_deleteUObject, NULL, 5, status); if (U_FAILURE(status)) { return NULL; } } UnicodeString* idToCache = new UnicodeString(currentDescriptor); if (idToCache == NULL || idToCache->isBogus()) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } cacheDescriptorList._obj->addElement(idToCache, status); if (U_FAILURE(status)) { return NULL; } } while (key.fallback()); outerEnd: if (result != NULL) { if (putInCache && cacheResult) { serviceCache->put(result->actualDescriptor, result, status); if (U_FAILURE(status)) { delete result; return NULL; } if (cacheDescriptorList._obj != NULL) { for (int32_t i = cacheDescriptorList._obj->size(); --i >= 0;) { UnicodeString* desc = (UnicodeString*)cacheDescriptorList._obj->elementAt(i); serviceCache->put(*desc, result, status); if (U_FAILURE(status)) { delete result; return NULL; } result->ref(); cacheDescriptorList._obj->removeElementAt(i); } } } if (actualReturn != NULL) { // strip null prefix if (result->actualDescriptor.indexOf((UChar)0x2f) == 0) { // U+002f=slash (/) actualReturn->remove(); actualReturn->append(result->actualDescriptor, 1, result->actualDescriptor.length() - 1); } else { *actualReturn = result->actualDescriptor; } if (actualReturn->isBogus()) { status = U_MEMORY_ALLOCATION_ERROR; delete result; return NULL; } } UObject* service = cloneInstance(result->service); if (putInCache && !cacheResult) { delete result; } return service; } } return handleDefault(key, actualReturn, status); }
static void _php_intlgregcal_constructor_body(INTERNAL_FUNCTION_PARAMETERS) { zval *tz_object = NULL; zval args_a[6] = {0}, *args = &args_a[0]; char *locale = NULL; int locale_len; zend_long largs[6]; UErrorCode status = U_ZERO_ERROR; int variant; intl_error_reset(NULL TSRMLS_CC); // parameter number validation / variant determination if (ZEND_NUM_ARGS() > 6 || zend_get_parameters_array_ex(ZEND_NUM_ARGS(), args) == FAILURE) { intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "intlgregcal_create_instance: too many arguments", 0 TSRMLS_CC); Z_OBJ_P(return_value) = NULL; return; } for (variant = ZEND_NUM_ARGS(); variant > 0 && Z_TYPE(args[variant - 1]) == IS_NULL; variant--) {} if (variant == 4) { intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "intlgregcal_create_instance: no variant with 4 arguments " "(excluding trailing NULLs)", 0 TSRMLS_CC); Z_OBJ_P(return_value) = NULL; return; } // argument parsing if (variant <= 2) { if (zend_parse_parameters(MIN(ZEND_NUM_ARGS(), 2) TSRMLS_CC, "|z!s!", &tz_object, &locale, &locale_len) == FAILURE) { intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "intlgregcal_create_instance: bad arguments", 0 TSRMLS_CC); Z_OBJ_P(return_value) = NULL; return; } } if (variant > 2 && zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "lll|lll", &largs[0], &largs[1], &largs[2], &largs[3], &largs[4], &largs[5]) == FAILURE) { intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "intlgregcal_create_instance: bad arguments", 0 TSRMLS_CC); Z_OBJ_P(return_value) = NULL; return; } // instantion of ICU object GregorianCalendar *gcal = NULL; if (variant <= 2) { // From timezone and locale (0 to 2 arguments) TimeZone *tz = timezone_process_timezone_argument(tz_object, NULL, "intlgregcal_create_instance" TSRMLS_CC); if (tz == NULL) { Z_OBJ_P(return_value) = NULL; return; } if (!locale) { locale = const_cast<char*>(intl_locale_get_default(TSRMLS_C)); } gcal = new GregorianCalendar(tz, Locale::createFromName(locale), status); if (U_FAILURE(status)) { intl_error_set(NULL, status, "intlgregcal_create_instance: error " "creating ICU GregorianCalendar from time zone and locale", 0 TSRMLS_CC); if (gcal) { delete gcal; } delete tz; Z_OBJ_P(return_value) = NULL; return; } } else { // From date/time (3, 5 or 6 arguments) for (int i = 0; i < variant; i++) { if (largs[i] < INT32_MIN || largs[i] > INT32_MAX) { intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "intlgregcal_create_instance: at least one of the arguments" " has an absolute value that is too large", 0 TSRMLS_CC); Z_OBJ_P(return_value) = NULL; return; } } if (variant == 3) { gcal = new GregorianCalendar((int32_t)largs[0], (int32_t)largs[1], (int32_t)largs[2], status); } else if (variant == 5) { gcal = new GregorianCalendar((int32_t)largs[0], (int32_t)largs[1], (int32_t)largs[2], (int32_t)largs[3], (int32_t)largs[4], status); } else if (variant == 6) { gcal = new GregorianCalendar((int32_t)largs[0], (int32_t)largs[1], (int32_t)largs[2], (int32_t)largs[3], (int32_t)largs[4], (int32_t)largs[5], status); } if (U_FAILURE(status)) { intl_error_set(NULL, status, "intlgregcal_create_instance: error " "creating ICU GregorianCalendar from date", 0 TSRMLS_CC); if (gcal) { delete gcal; } Z_OBJ_P(return_value) = NULL; return; } timelib_tzinfo *tzinfo = get_timezone_info(TSRMLS_C); #if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 42 UnicodeString tzstr = UnicodeString::fromUTF8(StringPiece(tzinfo->name)); #else UnicodeString tzstr = UnicodeString(tzinfo->name, strlen(tzinfo->name), US_INV); #endif if (tzstr.isBogus()) { intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "intlgregcal_create_instance: could not create UTF-8 string " "from PHP's default timezone name (see date_default_timezone_get())", 0 TSRMLS_CC); delete gcal; Z_OBJ_P(return_value) = NULL; return; } TimeZone *tz = TimeZone::createTimeZone(tzstr); gcal->adoptTimeZone(tz); } Calendar_object *co = Z_INTL_CALENDAR_P(return_value); co->ucal = gcal; }
void ICUServiceTest::testAPI_Two() { UErrorCode status = U_ZERO_ERROR; TestStringService service; service.registerFactory(new AnonymousStringFactory(), status); // anonymous factory will still handle the id { UErrorCode status = U_ZERO_ERROR; const UnicodeString en_US = "en_US"; UnicodeString* result = (UnicodeString*)service.get(en_US, status); confirmEqual("21) locale", result, &en_US); delete result; } // still normalizes id { UErrorCode status = U_ZERO_ERROR; const UnicodeString en_US_BAR = "en_US_BAR"; UnicodeString resultID; UnicodeString* result = (UnicodeString*)service.get("EN_us_bar", &resultID, status); confirmEqual("22) locale", &resultID, &en_US_BAR); delete result; } // we can override for particular ids UnicodeString* singleton0 = new UnicodeString("Zero"); service.registerInstance(singleton0, "en_US_BAR", status); { UErrorCode status = U_ZERO_ERROR; UnicodeString* result = (UnicodeString*)service.get("en_US_BAR", status); confirmEqual("23) override super", result, singleton0); delete result; } // empty service should not recognize anything service.reset(); { UErrorCode status = U_ZERO_ERROR; UnicodeString* result = (UnicodeString*)service.get("en_US", status); confirmIdentical("24) empty", result, NULL); } // create a custom multiple key factory { UnicodeString xids[] = { "en_US_VALLEY_GIRL", "en_US_VALLEY_BOY", "en_US_SURFER_GAL", "en_US_SURFER_DUDE" }; int32_t count = sizeof(xids)/sizeof(UnicodeString); ICUServiceFactory* f = new TestMultipleKeyStringFactory(xids, count, "Later"); service.registerFactory(f, status); } // iterate over the visual ids returned by the multiple factory { UErrorCode status = U_ZERO_ERROR; UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status); service.getVisibleIDs(ids, status); for (int i = 0; i < ids.size(); ++i) { const UnicodeString* id = (const UnicodeString*)ids[i]; UnicodeString* result = (UnicodeString*)service.get(*id, status); if (result) { logln(" " + *id + " --> " + *result); delete result; } else { errln("could not find " + *id); } } // four visible ids confirmIdentical("25) visible ids", ids.size(), 4); } // iterate over the display names { UErrorCode status = U_ZERO_ERROR; UVector names(status); service.getDisplayNames(names, status); for (int i = 0; i < names.size(); ++i) { const StringPair* pair = (const StringPair*)names[i]; logln(" " + pair->displayName + " --> " + pair->id); } confirmIdentical("26) display names", names.size(), 4); } // no valid display name { UnicodeString name; service.getDisplayName("en_US_VALLEY_GEEK", name); confirmBoolean("27) get display name", name.isBogus()); } { UnicodeString name; service.getDisplayName("en_US_SURFER_DUDE", name, Locale::getEnglish()); confirmStringsEqual("28) get display name", name, "English (United States, SURFER_DUDE)"); } // register another multiple factory { UnicodeString xids[] = { "en_US_SURFER", "en_US_SURFER_GAL", "en_US_SILICON", "en_US_SILICON_GEEK", }; int32_t count = sizeof(xids)/sizeof(UnicodeString); ICUServiceFactory* f = new TestMultipleKeyStringFactory(xids, count, "Rad dude"); service.registerFactory(f, status); } // this time, we have seven display names // Rad dude's surfer gal 'replaces' Later's surfer gal { UErrorCode status = U_ZERO_ERROR; UVector names(status); service.getDisplayNames(names, Locale("es"), status); for (int i = 0; i < names.size(); ++i) { const StringPair* pair = (const StringPair*)names[i]; logln(" " + pair->displayName + " --> " + pair->id); } confirmIdentical("29) display names", names.size(), 7); } // we should get the display name corresponding to the actual id // returned by the id we used. { UErrorCode status = U_ZERO_ERROR; UnicodeString actualID; UnicodeString id = "en_us_surfer_gal"; UnicodeString* gal = (UnicodeString*)service.get(id, &actualID, status); if (gal != NULL) { UnicodeString displayName; logln("actual id: " + actualID); service.getDisplayName(actualID, displayName, Locale::getEnglish()); logln("found actual: " + *gal + " with display name: " + displayName); confirmBoolean("30) found display name for actual", !displayName.isBogus()); service.getDisplayName(id, displayName, Locale::getEnglish()); logln("found actual: " + *gal + " with display name: " + displayName); confirmBoolean("31) found display name for query", displayName.isBogus()); delete gal; } else { errln("30) service could not find entry for " + id); } } // this should be handled by the 'dude' factory, since it overrides en_US_SURFER. { UErrorCode status = U_ZERO_ERROR; UnicodeString actualID; UnicodeString id = "en_US_SURFER_BOZO"; UnicodeString* bozo = (UnicodeString*)service.get(id, &actualID, status); if (bozo != NULL) { UnicodeString displayName; service.getDisplayName(actualID, displayName, Locale::getEnglish()); logln("found actual: " + *bozo + " with display name: " + displayName); confirmBoolean("32) found display name for actual", !displayName.isBogus()); service.getDisplayName(id, displayName, Locale::getEnglish()); logln("found actual: " + *bozo + " with display name: " + displayName); confirmBoolean("33) found display name for query", displayName.isBogus()); delete bozo; } else { errln("32) service could not find entry for " + id); } } // certainly not default... { confirmBoolean("34) is default ", !service.isDefault()); } { UErrorCode status = U_ZERO_ERROR; UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status); service.getVisibleIDs(ids, status); for (int i = 0; i < ids.size(); ++i) { const UnicodeString* id = (const UnicodeString*)ids[i]; msgstr(*id + "? ", service.get(*id, status)); } logstr("valleygirl? ", service.get("en_US_VALLEY_GIRL", status)); logstr("valleyboy? ", service.get("en_US_VALLEY_BOY", status)); logstr("valleydude? ", service.get("en_US_VALLEY_DUDE", status)); logstr("surfergirl? ", service.get("en_US_SURFER_GIRL", status)); } }
void LocaleDisplayNamesImpl::initialize(void) { LocaleDisplayNamesImpl *nonConstThis = (LocaleDisplayNamesImpl *)this; nonConstThis->locale = langData.getLocale() == Locale::getRoot() ? regionData.getLocale() : langData.getLocale(); UnicodeString sep; langData.getNoFallback("localeDisplayPattern", "separator", sep); if (sep.isBogus()) { sep = UnicodeString("{0}, {1}", -1, US_INV); } UErrorCode status = U_ZERO_ERROR; separatorFormat.applyPatternMinMaxArguments(sep, 2, 2, status); UnicodeString pattern; langData.getNoFallback("localeDisplayPattern", "pattern", pattern); if (pattern.isBogus()) { pattern = UnicodeString("{0} ({1})", -1, US_INV); } format.applyPatternMinMaxArguments(pattern, 2, 2, status); if (pattern.indexOf((UChar)0xFF08) >= 0) { formatOpenParen.setTo((UChar)0xFF08); // fullwidth ( formatReplaceOpenParen.setTo((UChar)0xFF3B); // fullwidth [ formatCloseParen.setTo((UChar)0xFF09); // fullwidth ) formatReplaceCloseParen.setTo((UChar)0xFF3D); // fullwidth ] } else { formatOpenParen.setTo((UChar)0x0028); // ( formatReplaceOpenParen.setTo((UChar)0x005B); // [ formatCloseParen.setTo((UChar)0x0029); // ) formatReplaceCloseParen.setTo((UChar)0x005D); // ] } UnicodeString ktPattern; langData.get("localeDisplayPattern", "keyTypePattern", ktPattern); if (ktPattern.isBogus()) { ktPattern = UnicodeString("{0}={1}", -1, US_INV); } keyTypeFormat.applyPatternMinMaxArguments(ktPattern, 2, 2, status); uprv_memset(fCapitalization, 0, sizeof(fCapitalization)); #if !UCONFIG_NO_BREAK_ITERATION // Only get the context data if we need it! This is a const object so we know now... // Also check whether we will need a break iterator (depends on the data) UBool needBrkIter = FALSE; if (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE) { LocalUResourceBundlePointer resource(ures_open(NULL, locale.getName(), &status)); if (U_FAILURE(status)) { return; } CapitalizationContextSink sink(*this); ures_getAllItemsWithFallback(resource.getAlias(), "contextTransforms", sink, status); if (status == U_MISSING_RESOURCE_ERROR) { // Silently ignore. Not every locale has contextTransforms. status = U_ZERO_ERROR; } else if (U_FAILURE(status)) { return; } needBrkIter = sink.hasCapitalizationUsage; } // Get a sentence break iterator if we will need it if (needBrkIter || capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE) { status = U_ZERO_ERROR; capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status); if (U_FAILURE(status)) { delete capitalizationBrkIter; capitalizationBrkIter = NULL; } } #endif }
const UChar* U_EXPORT2 ZoneMeta::getCanonicalCLDRID(const UnicodeString &tzid, UErrorCode& status) { if (U_FAILURE(status)) { return NULL; } if (tzid.isBogus() || tzid.length() > ZID_KEY_MAX) { status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } // Checking the cached results umtx_initOnce(gCanonicalIDCacheInitOnce, &initCanonicalIDCache, status); if (U_FAILURE(status)) { return NULL; } const UChar *canonicalID = NULL; UErrorCode tmpStatus = U_ZERO_ERROR; UChar utzid[ZID_KEY_MAX + 1]; tzid.extract(utzid, ZID_KEY_MAX + 1, tmpStatus); U_ASSERT(tmpStatus == U_ZERO_ERROR); // we checked the length of tzid already if (!uprv_isInvariantUString(utzid, -1)) { // All of known tz IDs are only containing ASCII invariant characters. status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } // Check if it was already cached umtx_lock(gZoneMetaLock()); { canonicalID = (const UChar *)uhash_get(gCanonicalIDCache, utzid); } umtx_unlock(gZoneMetaLock()); if (canonicalID != NULL) { return canonicalID; } // If not, resolve CLDR canonical ID with resource data UBool isInputCanonical = FALSE; char id[ZID_KEY_MAX + 1]; tzid.extract(0, 0x7fffffff, id, UPRV_LENGTHOF(id), US_INV); // replace '/' with ':' char *p = id; while (*p++) { if (*p == '/') { *p = ':'; } } UResourceBundle *top = ures_openDirect(NULL, gKeyTypeData, &tmpStatus); UResourceBundle *rb = ures_getByKey(top, gTypeMapTag, NULL, &tmpStatus); ures_getByKey(rb, gTimezoneTag, rb, &tmpStatus); ures_getByKey(rb, id, rb, &tmpStatus); if (U_SUCCESS(tmpStatus)) { // type entry (canonical) found // the input is the canonical ID. resolve to const UChar* canonicalID = TimeZone::findID(tzid); isInputCanonical = TRUE; } if (canonicalID == NULL) { // If a map element not found, then look for an alias tmpStatus = U_ZERO_ERROR; ures_getByKey(top, gTypeAliasTag, rb, &tmpStatus); ures_getByKey(rb, gTimezoneTag, rb, &tmpStatus); const UChar *canonical = ures_getStringByKey(rb,id,NULL,&tmpStatus); if (U_SUCCESS(tmpStatus)) { // canonical map found canonicalID = canonical; } if (canonicalID == NULL) { // Dereference the input ID using the tz data const UChar *derefer = TimeZone::dereferOlsonLink(tzid); if (derefer == NULL) { status = U_ILLEGAL_ARGUMENT_ERROR; } else { int32_t len = u_strlen(derefer); u_UCharsToChars(derefer,id,len); id[len] = (char) 0; // Make sure it is null terminated. // replace '/' with ':' char *q = id; while (*q++) { if (*q == '/') { *q = ':'; } } // If a dereference turned something up then look for an alias. // rb still points to the alias table, so we don't have to go looking // for it. tmpStatus = U_ZERO_ERROR; canonical = ures_getStringByKey(rb,id,NULL,&tmpStatus); if (U_SUCCESS(tmpStatus)) { // canonical map for the dereferenced ID found canonicalID = canonical; } else { canonicalID = derefer; isInputCanonical = TRUE; } } } } ures_close(rb); ures_close(top); if (U_SUCCESS(status)) { U_ASSERT(canonicalID != NULL); // canocanilD must be non-NULL here // Put the resolved canonical ID to the cache umtx_lock(gZoneMetaLock()); { const UChar* idInCache = (const UChar *)uhash_get(gCanonicalIDCache, utzid); if (idInCache == NULL) { const UChar* key = ZoneMeta::findTimeZoneID(tzid); U_ASSERT(key != NULL); if (key != NULL) { idInCache = (const UChar *)uhash_put(gCanonicalIDCache, (void *)key, (void *)canonicalID, &status); U_ASSERT(idInCache == NULL); } } if (U_SUCCESS(status) && isInputCanonical) { // Also put canonical ID itself into the cache if not exist const UChar *canonicalInCache = (const UChar*)uhash_get(gCanonicalIDCache, canonicalID); if (canonicalInCache == NULL) { canonicalInCache = (const UChar *)uhash_put(gCanonicalIDCache, (void *)canonicalID, (void *)canonicalID, &status); U_ASSERT(canonicalInCache == NULL); } } } umtx_unlock(gZoneMetaLock()); } return canonicalID; }
UnicodeString& LocaleDisplayNamesImpl::localeDisplayName(const Locale& locale, UnicodeString& result) const { if (locale.isBogus()) { result.setToBogus(); return result; } UnicodeString resultName; const char* lang = locale.getLanguage(); if (uprv_strlen(lang) == 0) { lang = "root"; } const char* script = locale.getScript(); const char* country = locale.getCountry(); const char* variant = locale.getVariant(); UBool hasScript = uprv_strlen(script) > 0; UBool hasCountry = uprv_strlen(country) > 0; UBool hasVariant = uprv_strlen(variant) > 0; if (dialectHandling == ULDN_DIALECT_NAMES) { char buffer[ULOC_FULLNAME_CAPACITY]; do { // loop construct is so we can break early out of search if (hasScript && hasCountry) { ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, "_", country, (char *)0); localeIdName(buffer, resultName); if (!resultName.isBogus()) { hasScript = FALSE; hasCountry = FALSE; break; } } if (hasScript) { ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, (char *)0); localeIdName(buffer, resultName); if (!resultName.isBogus()) { hasScript = FALSE; break; } } if (hasCountry) { ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", country, (char*)0); localeIdName(buffer, resultName); if (!resultName.isBogus()) { hasCountry = FALSE; break; } } } while (FALSE); } if (resultName.isBogus() || resultName.isEmpty()) { localeIdName(lang, resultName); } UnicodeString resultRemainder; UnicodeString temp; UErrorCode status = U_ZERO_ERROR; if (hasScript) { resultRemainder.append(scriptDisplayName(script, temp, TRUE)); } if (hasCountry) { appendWithSep(resultRemainder, regionDisplayName(country, temp, TRUE)); } if (hasVariant) { appendWithSep(resultRemainder, variantDisplayName(variant, temp, TRUE)); } resultRemainder.findAndReplace(formatOpenParen, formatReplaceOpenParen); resultRemainder.findAndReplace(formatCloseParen, formatReplaceCloseParen); LocalPointer<StringEnumeration> e(locale.createKeywords(status)); if (e.isValid() && U_SUCCESS(status)) { UnicodeString temp2; char value[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // sigh, no ULOC_VALUE_CAPACITY const char* key; while ((key = e->next((int32_t *)0, status)) != NULL) { locale.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status); if (U_FAILURE(status)) { return result; } keyDisplayName(key, temp, TRUE); temp.findAndReplace(formatOpenParen, formatReplaceOpenParen); temp.findAndReplace(formatCloseParen, formatReplaceCloseParen); keyValueDisplayName(key, value, temp2, TRUE); temp2.findAndReplace(formatOpenParen, formatReplaceOpenParen); temp2.findAndReplace(formatCloseParen, formatReplaceCloseParen); if (temp2 != UnicodeString(value, -1, US_INV)) { appendWithSep(resultRemainder, temp2); } else if (temp != UnicodeString(key, -1, US_INV)) { UnicodeString temp3; keyTypeFormat.format(temp, temp2, temp3, status); appendWithSep(resultRemainder, temp3); } else { appendWithSep(resultRemainder, temp) .append((UChar)0x3d /* = */) .append(temp2); } } } if (!resultRemainder.isEmpty()) { format.format(resultName, resultRemainder, result.remove(), status); return adjustForUsageAndContext(kCapContextUsageLanguage, result); } result = resultName; return adjustForUsageAndContext(kCapContextUsageLanguage, result); }
void UTS46Test::TestAPI() { UErrorCode errorCode=U_ZERO_ERROR; UnicodeString result; IDNAInfo info; UnicodeString input=UNICODE_STRING_SIMPLE("www.eXample.cOm"); UnicodeString expected=UNICODE_STRING_SIMPLE("www.example.com"); trans->nameToASCII(input, result, info, errorCode); if(U_FAILURE(errorCode) || info.hasErrors() || result!=expected) { errln("T.nameToASCII(www.example.com) info.errors=%04lx result matches=%d %s", (long)info.getErrors(), result==expected, u_errorName(errorCode)); } errorCode=U_USELESS_COLLATOR_ERROR; trans->nameToUnicode(input, result, info, errorCode); if(errorCode!=U_USELESS_COLLATOR_ERROR || !result.isBogus()) { errln("T.nameToUnicode(U_FAILURE) did not preserve the errorCode " "or not result.setToBogus() - %s", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; input.setToBogus(); result=UNICODE_STRING_SIMPLE("quatsch"); nontrans->labelToASCII(input, result, info, errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || !result.isBogus()) { errln("N.labelToASCII(bogus) did not set illegal-argument-error " "or not result.setToBogus() - %s", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; input=UNICODE_STRING_SIMPLE("xn--bcher.de-65a"); expected=UNICODE_STRING_SIMPLE("xn--bcher\\uFFFDde-65a").unescape(); nontrans->labelToASCII(input, result, info, errorCode); if( U_FAILURE(errorCode) || info.getErrors()!=(UIDNA_ERROR_LABEL_HAS_DOT|UIDNA_ERROR_INVALID_ACE_LABEL) || result!=expected ) { errln("N.labelToASCII(label-with-dot) failed with errors %04lx - %s", info.getErrors(), u_errorName(errorCode)); } // UTF-8 char buffer[100]; TestCheckedArrayByteSink sink(buffer, UPRV_LENGTHOF(buffer)); errorCode=U_ZERO_ERROR; nontrans->labelToUnicodeUTF8(StringPiece(NULL, 5), sink, info, errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || sink.NumberOfBytesWritten()!=0) { errln("N.labelToUnicodeUTF8(StringPiece(NULL, 5)) did not set illegal-argument-error ", "or did output something - %s", u_errorName(errorCode)); } sink.Reset(); errorCode=U_ZERO_ERROR; nontrans->nameToASCII_UTF8(StringPiece(), sink, info, errorCode); if(U_FAILURE(errorCode) || sink.NumberOfBytesWritten()!=0 || !sink.calledFlush) { errln("N.nameToASCII_UTF8(empty) failed - %s", u_errorName(errorCode)); } static const char s[]={ 0x61, (char)0xc3, (char)0x9f }; sink.Reset(); errorCode=U_USELESS_COLLATOR_ERROR; nontrans->nameToUnicodeUTF8(StringPiece(s, 3), sink, info, errorCode); if(errorCode!=U_USELESS_COLLATOR_ERROR || sink.NumberOfBytesWritten()!=0) { errln("N.nameToUnicode_UTF8(U_FAILURE) did not preserve the errorCode " "or did output something - %s", u_errorName(errorCode)); } sink.Reset(); errorCode=U_ZERO_ERROR; trans->labelToUnicodeUTF8(StringPiece(s, 3), sink, info, errorCode); if( U_FAILURE(errorCode) || sink.NumberOfBytesWritten()!=3 || buffer[0]!=0x61 || buffer[1]!=0x73 || buffer[2]!=0x73 || !sink.calledFlush ) { errln("T.labelToUnicodeUTF8(a sharp-s) failed - %s", u_errorName(errorCode)); } sink.Reset(); errorCode=U_ZERO_ERROR; // "eXampLe.cOm" static const char eX[]={ 0x65, 0x58, 0x61, 0x6d, 0x70, 0x4c, 0x65, 0x2e, 0x63, 0x4f, 0x6d, 0 }; // "example.com" static const char ex[]={ 0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x2e, 0x63, 0x6f, 0x6d }; trans->nameToUnicodeUTF8(eX, sink, info, errorCode); if( U_FAILURE(errorCode) || sink.NumberOfBytesWritten()!=11 || 0!=memcmp(ex, buffer, 11) || !sink.calledFlush ) { errln("T.nameToUnicodeUTF8(eXampLe.cOm) failed - %s", u_errorName(errorCode)); } }
inline UBool isSet() const { return !prefix.isBogus(); }
bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign, UErrorCode&) const { if (result.seenNumber() && exponentSign == 0) { // A number has already been consumed. return false; } else if (exponentSign != 0) { // scientific notation always comes after the number U_ASSERT(!result.quantity.bogus); } // Initial offset before any character consumption. int32_t initialOffset = segment.getOffset(); // Return value: whether to ask for more characters. bool maybeMore = false; // All digits consumed so far. number::impl::DecimalQuantity digitsConsumed; digitsConsumed.bogus = true; // The total number of digits after the decimal place, used for scaling the result. int32_t digitsAfterDecimalPlace = 0; // The actual grouping and decimal separators used in the string. // If non-null, we have seen that token. UnicodeString actualGroupingString; UnicodeString actualDecimalString; actualGroupingString.setToBogus(); actualDecimalString.setToBogus(); // Information for two groups: the previous group and the current group. // // Each group has three pieces of information: // // Offset: the string position of the beginning of the group, including a leading separator // if there was a leading separator. This is needed in case we need to rewind the parse to // that position. // // Separator type: // 0 => beginning of string // 1 => lead separator is a grouping separator // 2 => lead separator is a decimal separator // // Count: the number of digits in the group. If -1, the group has been validated. int32_t currGroupOffset = 0; int32_t currGroupSepType = 0; int32_t currGroupCount = 0; int32_t prevGroupOffset = -1; int32_t prevGroupSepType = -1; int32_t prevGroupCount = -1; while (segment.length() > 0) { maybeMore = false; // Attempt to match a digit. int8_t digit = -1; // Try by code point digit value. UChar32 cp = segment.getCodePoint(); if (u_isdigit(cp)) { segment.adjustOffset(U16_LENGTH(cp)); digit = static_cast<int8_t>(u_digit(cp, 10)); } // Try by digit string. if (digit == -1 && !fLocalDigitStrings.isNull()) { for (int32_t i = 0; i < 10; i++) { const UnicodeString& str = fLocalDigitStrings[i]; if (str.isEmpty()) { continue; } int32_t overlap = segment.getCommonPrefixLength(str); if (overlap == str.length()) { segment.adjustOffset(overlap); digit = static_cast<int8_t>(i); break; } maybeMore = maybeMore || (overlap == segment.length()); } } if (digit >= 0) { // Digit was found. if (digitsConsumed.bogus) { digitsConsumed.bogus = false; digitsConsumed.clear(); } digitsConsumed.appendDigit(digit, 0, true); currGroupCount++; if (!actualDecimalString.isBogus()) { digitsAfterDecimalPlace++; } continue; } // Attempt to match a literal grouping or decimal separator. bool isDecimal = false; bool isGrouping = false; // 1) Attempt the decimal separator string literal. // if (we have not seen a decimal separator yet) { ... } if (actualDecimalString.isBogus() && !decimalSeparator.isEmpty()) { int32_t overlap = segment.getCommonPrefixLength(decimalSeparator); maybeMore = maybeMore || (overlap == segment.length()); if (overlap == decimalSeparator.length()) { isDecimal = true; actualDecimalString = decimalSeparator; } } // 2) Attempt to match the actual grouping string literal. if (!actualGroupingString.isBogus()) { int32_t overlap = segment.getCommonPrefixLength(actualGroupingString); maybeMore = maybeMore || (overlap == segment.length()); if (overlap == actualGroupingString.length()) { isGrouping = true; } } // 2.5) Attempt to match a new the grouping separator string literal. // if (we have not seen a grouping or decimal separator yet) { ... } if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus() && !groupingSeparator.isEmpty()) { int32_t overlap = segment.getCommonPrefixLength(groupingSeparator); maybeMore = maybeMore || (overlap == segment.length()); if (overlap == groupingSeparator.length()) { isGrouping = true; actualGroupingString = groupingSeparator; } } // 3) Attempt to match a decimal separator from the equivalence set. // if (we have not seen a decimal separator yet) { ... } // The !isGrouping is to confirm that we haven't yet matched the current character. if (!isGrouping && actualDecimalString.isBogus()) { if (decimalUniSet->contains(cp)) { isDecimal = true; actualDecimalString = UnicodeString(cp); } } // 4) Attempt to match a grouping separator from the equivalence set. // if (we have not seen a grouping or decimal separator yet) { ... } if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus()) { if (groupingUniSet->contains(cp)) { isGrouping = true; actualGroupingString = UnicodeString(cp); } } // Leave if we failed to match this as a separator. if (!isDecimal && !isGrouping) { break; } // Check for conditions when we don't want to accept the separator. if (isDecimal && integerOnly) { break; } else if (currGroupSepType == 2 && isGrouping) { // Fraction grouping break; } // Validate intermediate grouping sizes. bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false); bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true); if (!prevValidSecondary || (isDecimal && !currValidPrimary)) { // Invalid grouping sizes. if (isGrouping && currGroupCount == 0) { // Trailing grouping separators: these are taken care of below U_ASSERT(currGroupSepType == 1); } else if (requireGroupingMatch) { // Strict mode: reject the parse digitsConsumed.clear(); digitsConsumed.bogus = true; } break; } else if (requireGroupingMatch && currGroupCount == 0 && currGroupSepType == 1) { break; } else { // Grouping sizes OK so far. prevGroupOffset = currGroupOffset; prevGroupCount = currGroupCount; if (isDecimal) { // Do not validate this group any more. prevGroupSepType = -1; } else { prevGroupSepType = currGroupSepType; } } // OK to accept the separator. // Special case: don't update currGroup if it is empty; this allows two grouping // separators in a row in lenient mode. if (currGroupCount != 0) { currGroupOffset = segment.getOffset(); } currGroupSepType = isGrouping ? 1 : 2; currGroupCount = 0; if (isGrouping) { segment.adjustOffset(actualGroupingString.length()); } else { segment.adjustOffset(actualDecimalString.length()); } } // End of main loop. // Back up if there was a trailing grouping separator. // Shift prev -> curr so we can check it as a final group. if (currGroupSepType != 2 && currGroupCount == 0) { maybeMore = true; segment.setOffset(currGroupOffset); currGroupOffset = prevGroupOffset; currGroupSepType = prevGroupSepType; currGroupCount = prevGroupCount; prevGroupOffset = -1; prevGroupSepType = 0; prevGroupCount = 1; } // Validate final grouping sizes. bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false); bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true); if (!requireGroupingMatch) { // The cases we need to handle here are lone digits. // Examples: "1,1" "1,1," "1,1,1" "1,1,1," ",1" (all parse as 1) // See more examples in numberformattestspecification.txt int32_t digitsToRemove = 0; if (!prevValidSecondary) { segment.setOffset(prevGroupOffset); digitsToRemove += prevGroupCount; digitsToRemove += currGroupCount; } else if (!currValidPrimary && (prevGroupSepType != 0 || prevGroupCount != 0)) { maybeMore = true; segment.setOffset(currGroupOffset); digitsToRemove += currGroupCount; } if (digitsToRemove != 0) { digitsConsumed.adjustMagnitude(-digitsToRemove); digitsConsumed.truncate(); } prevValidSecondary = true; currValidPrimary = true; } if (currGroupSepType != 2 && (!prevValidSecondary || !currValidPrimary)) { // Grouping failure. digitsConsumed.bogus = true; } // Strings that start with a separator but have no digits, // or strings that failed a grouping size check. if (digitsConsumed.bogus) { maybeMore = maybeMore || (segment.length() == 0); segment.setOffset(initialOffset); return maybeMore; } // We passed all inspections. Start post-processing. // Adjust for fraction part. digitsConsumed.adjustMagnitude(-digitsAfterDecimalPlace); // Set the digits, either normal or exponent. if (exponentSign != 0 && segment.getOffset() != initialOffset) { bool overflow = false; if (digitsConsumed.fitsInLong()) { int64_t exponentLong = digitsConsumed.toLong(false); U_ASSERT(exponentLong >= 0); if (exponentLong <= INT32_MAX) { auto exponentInt = static_cast<int32_t>(exponentLong); if (result.quantity.adjustMagnitude(exponentSign * exponentInt)) { overflow = true; } } else { overflow = true; } } else { overflow = true; } if (overflow) { if (exponentSign == -1) { // Set to zero result.quantity.clear(); } else { // Set to infinity result.quantity.bogus = true; result.flags |= FLAG_INFINITY; } } } else { result.quantity = digitsConsumed; } // Set other information into the result and return. if (!actualDecimalString.isBogus()) { result.flags |= FLAG_HAS_DECIMAL_SEPARATOR; } result.setCharsConsumed(segment); return segment.length() == 0 || maybeMore; }
/* {{{ timezone_convert_to_datetimezone * Convert from TimeZone to DateTimeZone object */ U_CFUNC zval *timezone_convert_to_datetimezone(const TimeZone *timeZone, intl_error *outside_error, const char *func, zval *ret) { UnicodeString id; char *message = NULL; php_timezone_obj *tzobj; zval arg; timeZone->getID(id); if (id.isBogus()) { spprintf(&message, 0, "%s: could not obtain TimeZone id", func); intl_errors_set(outside_error, U_ILLEGAL_ARGUMENT_ERROR, message, 1); goto error; } object_init_ex(ret, php_date_get_timezone_ce()); tzobj = Z_PHPTIMEZONE_P(ret); if (id.compare(0, 3, UnicodeString("GMT", sizeof("GMT")-1, US_INV)) == 0) { /* The DateTimeZone constructor doesn't support offset time zones, * so we must mess with DateTimeZone structure ourselves */ tzobj->initialized = 1; tzobj->type = TIMELIB_ZONETYPE_OFFSET; //convert offset from milliseconds to minutes tzobj->tzi.utc_offset = -1 * timeZone->getRawOffset() / (60 * 1000); } else { char *str; size_t str_len; /* Call the constructor! */ if (intl_charFromString(id, &str, &str_len, &INTL_ERROR_CODE(*outside_error)) == FAILURE) { spprintf(&message, 0, "%s: could not convert id to UTF-8", func); intl_errors_set(outside_error, INTL_ERROR_CODE(*outside_error), message, 1); goto error; } ZVAL_STRINGL(&arg, str, str_len); //??? efree(str); zend_call_method_with_1_params(ret, NULL, NULL, "__construct", NULL, &arg); if (EG(exception)) { spprintf(&message, 0, "%s: DateTimeZone constructor threw exception", func); intl_errors_set(outside_error, U_ILLEGAL_ARGUMENT_ERROR, message, 1); zend_object_store_ctor_failed(Z_OBJ_P(ret)); zval_ptr_dtor(&arg); goto error; } zval_ptr_dtor(&arg); } if (0) { error: if (ret) { zval_ptr_dtor(ret); } ret = NULL; } if (message) { efree(message); } return ret; }
UVector& ICUService::getDisplayNames(UVector& result, const Locale& locale, const UnicodeString* matchID, UErrorCode& status) const { result.removeAllElements(); result.setDeleter(userv_deleteStringPair); if (U_SUCCESS(status)) { ICUService* ncthis = (ICUService*)this; // cast away semantic const Mutex mutex(&lock); if (dnCache != NULL && dnCache->locale != locale) { delete dnCache; ncthis->dnCache = NULL; } if (dnCache == NULL) { const Hashtable* m = getVisibleIDMap(status); if (U_FAILURE(status)) { return result; } ncthis->dnCache = new DNCache(locale); if (dnCache == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return result; } int32_t pos = UHASH_FIRST; const UHashElement* entry = NULL; while ((entry = m->nextElement(pos)) != NULL) { const UnicodeString* id = (const UnicodeString*)entry->key.pointer; ICUServiceFactory* f = (ICUServiceFactory*)entry->value.pointer; UnicodeString dname; f->getDisplayName(*id, locale, dname); if (dname.isBogus()) { status = U_MEMORY_ALLOCATION_ERROR; } else { dnCache->cache.put(dname, (void*)id, status); // share pointer with visibleIDMap if (U_SUCCESS(status)) { continue; } } delete dnCache; ncthis->dnCache = NULL; return result; } } } ICUServiceKey* matchKey = createKey(matchID, status); /* To ensure that all elements in the hashtable are iterated, set pos to -1. * nextElement(pos) will skip the position at pos and begin the iteration * at the next position, which in this case will be 0. */ int32_t pos = UHASH_FIRST; const UHashElement *entry = NULL; while ((entry = dnCache->cache.nextElement(pos)) != NULL) { const UnicodeString* id = (const UnicodeString*)entry->value.pointer; if (matchKey != NULL && !matchKey->isFallbackOf(*id)) { continue; } const UnicodeString* dn = (const UnicodeString*)entry->key.pointer; StringPair* sp = StringPair::create(*id, *dn, status); result.addElement(sp, status); if (U_FAILURE(status)) { result.removeAllElements(); break; } } delete matchKey; return result; }
void LocaleDisplayNamesImpl::initialize(void) { LocaleDisplayNamesImpl *nonConstThis = (LocaleDisplayNamesImpl *)this; nonConstThis->locale = langData.getLocale() == Locale::getRoot() ? regionData.getLocale() : langData.getLocale(); UnicodeString sep; langData.getNoFallback("localeDisplayPattern", "separator", sep); if (sep.isBogus()) { sep = UnicodeString("{0}, {1}", -1, US_INV); } UErrorCode status = U_ZERO_ERROR; separatorFormat.applyPatternMinMaxArguments(sep, 2, 2, status); UnicodeString pattern; langData.getNoFallback("localeDisplayPattern", "pattern", pattern); if (pattern.isBogus()) { pattern = UnicodeString("{0} ({1})", -1, US_INV); } format.applyPatternMinMaxArguments(pattern, 2, 2, status); if (pattern.indexOf((UChar)0xFF08) >= 0) { formatOpenParen.setTo((UChar)0xFF08); // fullwidth ( formatReplaceOpenParen.setTo((UChar)0xFF3B); // fullwidth [ formatCloseParen.setTo((UChar)0xFF09); // fullwidth ) formatReplaceCloseParen.setTo((UChar)0xFF3D); // fullwidth ] } else { formatOpenParen.setTo((UChar)0x0028); // ( formatReplaceOpenParen.setTo((UChar)0x005B); // [ formatCloseParen.setTo((UChar)0x0029); // ) formatReplaceCloseParen.setTo((UChar)0x005D); // ] } UnicodeString ktPattern; langData.get("localeDisplayPattern", "keyTypePattern", ktPattern); if (ktPattern.isBogus()) { ktPattern = UnicodeString("{0}={1}", -1, US_INV); } keyTypeFormat.applyPatternMinMaxArguments(ktPattern, 2, 2, status); uprv_memset(fCapitalization, 0, sizeof(fCapitalization)); #if !UCONFIG_NO_BREAK_ITERATION // The following is basically copied from DateFormatSymbols::initializeData typedef struct { const char * usageName; LocaleDisplayNamesImpl::CapContextUsage usageEnum; } ContextUsageNameToEnum; const ContextUsageNameToEnum contextUsageTypeMap[] = { // Entries must be sorted by usageTypeName; entry with NULL name terminates list. { "key", kCapContextUsageKey }, { "keyValue", kCapContextUsageKeyValue }, { "languages", kCapContextUsageLanguage }, { "script", kCapContextUsageScript }, { "territory", kCapContextUsageTerritory }, { "variant", kCapContextUsageVariant }, { NULL, (CapContextUsage)0 }, }; // Only get the context data if we need it! This is a const object so we know now... // Also check whether we will need a break iterator (depends on the data) UBool needBrkIter = FALSE; if (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE) { int32_t len = 0; UResourceBundle *localeBundle = ures_open(NULL, locale.getName(), &status); if (U_SUCCESS(status)) { UResourceBundle *contextTransforms = ures_getByKeyWithFallback(localeBundle, "contextTransforms", NULL, &status); if (U_SUCCESS(status)) { UResourceBundle *contextTransformUsage; while ( (contextTransformUsage = ures_getNextResource(contextTransforms, NULL, &status)) != NULL ) { const int32_t * intVector = ures_getIntVector(contextTransformUsage, &len, &status); if (U_SUCCESS(status) && intVector != NULL && len >= 2) { const char* usageKey = ures_getKey(contextTransformUsage); if (usageKey != NULL) { const ContextUsageNameToEnum * typeMapPtr = contextUsageTypeMap; int32_t compResult = 0; // linear search; list is short and we cannot be sure that bsearch is available while ( typeMapPtr->usageName != NULL && (compResult = uprv_strcmp(usageKey, typeMapPtr->usageName)) > 0 ) { ++typeMapPtr; } if (typeMapPtr->usageName != NULL && compResult == 0) { int32_t titlecaseInt = (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU)? intVector[0]: intVector[1]; if (titlecaseInt != 0) { fCapitalization[typeMapPtr->usageEnum] = TRUE;; needBrkIter = TRUE; } } } } status = U_ZERO_ERROR; ures_close(contextTransformUsage); } ures_close(contextTransforms); } ures_close(localeBundle); } } // Get a sentence break iterator if we will need it if (needBrkIter || capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE) { status = U_ZERO_ERROR; capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status); if (U_FAILURE(status)) { delete capitalizationBrkIter; capitalizationBrkIter = NULL; } } #endif }
/** * See if the decomposition of cp2 is at segment starting at segmentPos * (with canonical rearrangment!) * If so, take the remainder, and return the equivalents */ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) { //Hashtable *CanonicalIterator::extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) { //if (PROGRESS) printf(" extract: %s, ", UToS(Tr(UnicodeString(comp)))); //if (PROGRESS) printf("%s, %i\n", UToS(Tr(segment)), segmentPos); if (U_FAILURE(status)) { return NULL; } UnicodeString temp(comp); int32_t inputLen=temp.length(); UnicodeString decompString; nfd.normalize(temp, decompString, status); if (U_FAILURE(status)) { return NULL; } if (decompString.isBogus()) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } const UChar *decomp=decompString.getBuffer(); int32_t decompLen=decompString.length(); // See if it matches the start of segment (at segmentPos) UBool ok = FALSE; UChar32 cp; int32_t decompPos = 0; UChar32 decompCp; U16_NEXT(decomp, decompPos, decompLen, decompCp); int32_t i = segmentPos; while(i < segLen) { U16_NEXT(segment, i, segLen, cp); if (cp == decompCp) { // if equal, eat another cp from decomp //if (PROGRESS) printf(" matches: %s\n", UToS(Tr(UnicodeString(cp)))); if (decompPos == decompLen) { // done, have all decomp characters! temp.append(segment+i, segLen-i); ok = TRUE; break; } U16_NEXT(decomp, decompPos, decompLen, decompCp); } else { //if (PROGRESS) printf(" buffer: %s\n", UToS(Tr(UnicodeString(cp)))); // brute force approach temp.append(cp); /* TODO: optimize // since we know that the classes are monotonically increasing, after zero // e.g. 0 5 7 9 0 3 // we can do an optimization // there are only a few cases that work: zero, less, same, greater // if both classes are the same, we fail // if the decomp class < the segment class, we fail segClass = getClass(cp); if (decompClass <= segClass) return null; */ } } if (!ok) return NULL; // we failed, characters left over //if (PROGRESS) printf("Matches\n"); if (inputLen == temp.length()) { fillinResult->put(UnicodeString(), new UnicodeString(), status); return fillinResult; // succeed, but no remainder } // brute force approach // check to make sure result is canonically equivalent UnicodeString trial; nfd.normalize(temp, trial, status); if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) { return NULL; } return getEquivalents2(fillinResult, temp.getBuffer()+inputLen, temp.length()-inputLen, status); }
seec::Maybe<IndexedString> IndexedString::from(UnicodeString const &String) { if (String.isBogus()) return seec::Maybe<IndexedString>(); UnicodeString const NeedleStart("@["); UnicodeString const NeedleEscape("@[["); UnicodeString const NeedleEnd("]"); UnicodeString CleanedString; // String with index indicators removed. std::multimap<UnicodeString, Needle> Needles; std::vector<std::pair<UnicodeString, int32_t>> IndexStack; int32_t SearchFrom = 0; // Current offset in String. int32_t FoundStart; // Position of matched index indicator. while ((FoundStart = String.indexOf(NeedleStart, SearchFrom)) != -1) { // Copy all the literal string data. CleanedString.append(String, SearchFrom, FoundStart - SearchFrom); // Check if this is an escape sequence. if (String.compare(FoundStart, NeedleEscape.length(), NeedleEscape) == 0) { CleanedString.append(NeedleStart); SearchFrom = FoundStart + NeedleEscape.length(); continue; } // Find the end of this sequence. int32_t FoundEnd = String.indexOf(NeedleEnd, SearchFrom); if (FoundEnd == -1) return seec::Maybe<IndexedString>(); if (FoundEnd == FoundStart + NeedleStart.length()) { // This is a closing sequence. if (IndexStack.size() == 0) return seec::Maybe<IndexedString>(); // Pop the starting details of the last-opened sequence. auto const Start = IndexStack.back(); IndexStack.pop_back(); // Store the needle for this sequence. Needles.insert(std::make_pair(Start.first, Needle(Start.second, CleanedString.countChar32()))); } else { // This is an opening sequence. int32_t const NameStart = FoundStart + NeedleStart.length(); int32_t const NameLength = FoundEnd - NameStart; IndexStack.emplace_back(UnicodeString(String, NameStart, NameLength), CleanedString.countChar32()); } SearchFrom = FoundEnd + NeedleEnd.length(); } // Copy all remaining literal data. CleanedString.append(String, SearchFrom, String.length() - SearchFrom); return IndexedString(std::move(CleanedString), std::move(Needles)); }
UnicodeString& LocaleDisplayNamesImpl::localeDisplayName(const Locale& locale, UnicodeString& result) const { UnicodeString resultName; const char* lang = locale.getLanguage(); if (uprv_strlen(lang) == 0) { lang = "root"; } const char* script = locale.getScript(); const char* country = locale.getCountry(); const char* variant = locale.getVariant(); UBool hasScript = uprv_strlen(script) > 0; UBool hasCountry = uprv_strlen(country) > 0; UBool hasVariant = uprv_strlen(variant) > 0; if (dialectHandling == ULDN_DIALECT_NAMES) { char buffer[ULOC_FULLNAME_CAPACITY]; do { // loop construct is so we can break early out of search if (hasScript && hasCountry) { ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, "_", country, (char *)0); localeIdName(buffer, resultName); if (!resultName.isBogus()) { hasScript = FALSE; hasCountry = FALSE; break; } } if (hasScript) { ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, (char *)0); localeIdName(buffer, resultName); if (!resultName.isBogus()) { hasScript = FALSE; break; } } if (hasCountry) { ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", country, (char*)0); localeIdName(buffer, resultName); if (!resultName.isBogus()) { hasCountry = FALSE; break; } } } while (FALSE); } if (resultName.isBogus() || resultName.isEmpty()) { localeIdName(lang, resultName); } UnicodeString resultRemainder; UnicodeString temp; StringEnumeration *e = NULL; UErrorCode status = U_ZERO_ERROR; if (hasScript) { resultRemainder.append(scriptDisplayName(script, temp)); } if (hasCountry) { appendWithSep(resultRemainder, regionDisplayName(country, temp)); } if (hasVariant) { appendWithSep(resultRemainder, variantDisplayName(variant, temp)); } e = locale.createKeywords(status); if (e && U_SUCCESS(status)) { UnicodeString temp2; char value[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // sigh, no ULOC_VALUE_CAPACITY const char* key; while ((key = e->next((int32_t *)0, status)) != NULL) { locale.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status); keyDisplayName(key, temp); keyValueDisplayName(key, value, temp2); if (temp2 != UnicodeString(value, -1, US_INV)) { appendWithSep(resultRemainder, temp2); } else if (temp != UnicodeString(key, -1, US_INV)) { UnicodeString temp3; Formattable data[] = { temp, temp2 }; FieldPosition fpos; status = U_ZERO_ERROR; keyTypeFormat->format(data, 2, temp3, fpos, status); appendWithSep(resultRemainder, temp3); } else { appendWithSep(resultRemainder, temp) .append((UChar)0x3d /* = */) .append(temp2); } } delete e; } if (!resultRemainder.isEmpty()) { Formattable data[] = { resultName, resultRemainder }; FieldPosition fpos; status = U_ZERO_ERROR; format->format(data, 2, result, fpos, status); return result; } return result = resultName; }