static UBool do_canonicalize(const char* localeID, char* buffer, int32_t bufferCapacity, UErrorCode* err) { uloc_canonicalize( localeID, buffer, bufferCapacity, err); if (*err == U_STRING_NOT_TERMINATED_WARNING || *err == U_BUFFER_OVERFLOW_ERROR) { *err = U_ILLEGAL_ARGUMENT_ERROR; return FALSE; } else if (U_FAILURE(*err)) { return FALSE; } else { return TRUE; } }
int32 FInternationalization::GetCultureIndex(const FString& Name) { #if UE_ENABLE_ICU static const int32 MaximumNameLength = 64; const int32 NameLength = Name.Len(); check(NameLength < MaximumNameLength); char CanonicalName[MaximumNameLength]; UErrorCode ICUStatus = U_ZERO_ERROR; uloc_canonicalize(TCHAR_TO_ANSI( *Name ), CanonicalName, MaximumNameLength, &ICUStatus); #endif const int32 CultureCount = AllCultures.Num(); int32 i; for (i = 0; i < CultureCount; ++i) { #if UE_ENABLE_ICU if( AllCultures[i]->GetName() == CanonicalName ) #else if( AllCultures[i]->GetName() == Name ) #endif { break; } } if(i >= CultureCount) { i = -1; } return i; }
U_CDECL_END U_NAMESPACE_BEGIN Locale *locale_set_default_internal(const char *id, UErrorCode& status) { // Synchronize this entire function. Mutex lock(&gDefaultLocaleMutex); UBool canonicalize = FALSE; // If given a NULL string for the locale id, grab the default // name from the system. // (Different from most other locale APIs, where a null name means use // the current ICU default locale.) if (id == NULL) { id = uprv_getDefaultLocaleID(); // This function not thread safe? TODO: verify. canonicalize = TRUE; // always canonicalize host ID } char localeNameBuf[512]; if (canonicalize) { uloc_canonicalize(id, localeNameBuf, sizeof(localeNameBuf)-1, &status); } else { uloc_getName(id, localeNameBuf, sizeof(localeNameBuf)-1, &status); } localeNameBuf[sizeof(localeNameBuf)-1] = 0; // Force null termination in event of // a long name filling the buffer. // (long names are truncated.) // if (U_FAILURE(status)) { return gDefaultLocale; } if (gDefaultLocalesHashT == NULL) { gDefaultLocalesHashT = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status); if (U_FAILURE(status)) { return gDefaultLocale; } uhash_setValueDeleter(gDefaultLocalesHashT, deleteLocale); ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup); } Locale *newDefault = (Locale *)uhash_get(gDefaultLocalesHashT, localeNameBuf); if (newDefault == NULL) { newDefault = new Locale(Locale::eBOGUS); if (newDefault == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return gDefaultLocale; } newDefault->init(localeNameBuf, FALSE); uhash_put(gDefaultLocalesHashT, (char*) newDefault->getName(), newDefault, &status); if (U_FAILURE(status)) { return gDefaultLocale; } } gDefaultLocale = newDefault; return gDefaultLocale; }
U_CAPI void U_EXPORT2 ucol_prepareShortStringOpen( const char *definition, UBool, UParseError *parseError, UErrorCode *status) { if(U_FAILURE(*status)) return; UParseError internalParseError; if(!parseError) { parseError = &internalParseError; } parseError->line = 0; parseError->offset = 0; parseError->preContext[0] = 0; parseError->postContext[0] = 0; // first we want to pick stuff out of short string. // we'll end up with an UCA version, locale and a bunch of // settings // analyse the string in order to get everything we need. CollatorSpec s; ucol_sit_initCollatorSpecs(&s); ucol_sit_readSpecs(&s, definition, parseError, status); ucol_sit_calculateWholeLocale(&s); char buffer[internalBufferSize]; uprv_memset(buffer, 0, internalBufferSize); uloc_canonicalize(s.locale, buffer, internalBufferSize, status); UResourceBundle *b = ures_open(U_ICUDATA_COLL, buffer, status); /* we try to find stuff from keyword */ UResourceBundle *collations = ures_getByKey(b, "collations", NULL, status); UResourceBundle *collElem = NULL; char keyBuffer[256]; // if there is a keyword, we pick it up and try to get elements if(!uloc_getKeywordValue(buffer, "collation", keyBuffer, 256, status)) { // no keyword. we try to find the default setting, which will give us the keyword value UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, status); if(U_SUCCESS(*status)) { int32_t defaultKeyLen = 0; const UChar *defaultKey = ures_getString(defaultColl, &defaultKeyLen, status); u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen); keyBuffer[defaultKeyLen] = 0; } else { *status = U_INTERNAL_PROGRAM_ERROR; return; } ures_close(defaultColl); } collElem = ures_getByKeyWithFallback(collations, keyBuffer, collElem, status); ures_close(collElem); ures_close(collations); ures_close(b); }
FString FCulture::FICUCultureImplementation::GetCanonicalName(const FString& Name) { static const int32 MaximumNameLength = 64; const int32 NameLength = Name.Len(); check(NameLength < MaximumNameLength); char CanonicalName[MaximumNameLength]; UErrorCode ICUStatus = U_ZERO_ERROR; uloc_canonicalize(TCHAR_TO_ANSI( *Name ), CanonicalName, MaximumNameLength, &ICUStatus); return CanonicalName; }
int32_t GetLocale( const UChar* localeName, char* localeNameResult, int32_t localeNameResultLength, bool canonicalize, UErrorCode* err) { char localeNameTemp[ULOC_FULLNAME_CAPACITY] = {0}; int32_t localeLength; // Convert ourselves instead of doing u_UCharsToChars as that function considers '@' a variant and stops. for (int i = 0; i < ULOC_FULLNAME_CAPACITY - 1; i++) { UChar c = localeName[i]; if (c > (UChar)0x7F) { *err = U_ILLEGAL_ARGUMENT_ERROR; return ULOC_FULLNAME_CAPACITY; } localeNameTemp[i] = (char)c; if (c == (UChar)0x0) { break; } } if (canonicalize) { localeLength = uloc_canonicalize(localeNameTemp, localeNameResult, localeNameResultLength, err); } else { localeLength = uloc_getName(localeNameTemp, localeNameResult, localeNameResultLength, err); } if (U_SUCCESS(*err)) { // Make sure the "language" part of the locale is reasonable (i.e. we can fetch it and it is within range). // This mimics how the C++ ICU API determines if a locale is "bogus" or not. char language[ULOC_LANG_CAPACITY]; uloc_getLanguage(localeNameTemp, language, ULOC_LANG_CAPACITY, err); if (*err == U_STRING_NOT_TERMINATED_WARNING) { // ULOC_LANG_CAPACITY includes the null terminator, so if we couldn't extract the language with the null // terminator, the language must be invalid. *err = U_ILLEGAL_ARGUMENT_ERROR; } } return localeLength; }
static ULayoutType _uloc_getOrientationHelper(const char* localeId, const char* key, UErrorCode *status) { ULayoutType result = ULOC_LAYOUT_UNKNOWN; if (!U_FAILURE(*status)) { int32_t length = 0; char localeBuffer[ULOC_FULLNAME_CAPACITY]; uloc_canonicalize(localeId, localeBuffer, sizeof(localeBuffer), status); if (!U_FAILURE(*status)) { const UChar* const value = uloc_getTableStringWithFallback( NULL, localeBuffer, "layout", NULL, key, &length, status); if (!U_FAILURE(*status) && length != 0) { switch(value[0]) { case 0x0062: /* 'b' */ result = ULOC_LAYOUT_BTT; break; case 0x006C: /* 'l' */ result = ULOC_LAYOUT_LTR; break; case 0x0072: /* 'r' */ result = ULOC_LAYOUT_RTL; break; case 0x0074: /* 't' */ result = ULOC_LAYOUT_TTB; break; default: *status = U_INTERNAL_PROGRAM_ERROR; break; } } } } return result; }
static void php_canonicalize_locale_id(char **target, int32_t *target_len, char *locale, UErrorCode *status) { char *canonicalized = NULL; int32_t canonicalized_len = 128; while (1) { *status = U_ZERO_ERROR; canonicalized = erealloc(canonicalized, canonicalized_len + 1); canonicalized_len = uloc_canonicalize(locale, canonicalized, canonicalized_len, status); if (*status != U_BUFFER_OVERFLOW_ERROR) { break; } } canonicalized[canonicalized_len] = 0; *target = canonicalized; *target_len = canonicalized_len; }
/* {{{ * Gets the value from ICU * common code shared by get_primary_language,get_script or get_region or get_variant * result = 0 if error, 1 if successful , -1 if no value */ static char* get_icu_value_internal( char* loc_name , char* tag_name, int* result , int fromParseLocale) { char* tag_value = NULL; int32_t tag_value_len = 512; int singletonPos = 0; char* mod_loc_name = NULL; int grOffset = 0; int32_t buflen = 512; UErrorCode status = U_ZERO_ERROR; if( tag_name != LOC_CANONICALIZE_TAG ){ /* Handle grandfathered languages */ grOffset = findOffset( LOC_GRANDFATHERED , loc_name ); if( grOffset >= 0 ){ if( strcmp(tag_name , LOC_LANG_TAG)==0 ){ tag_value = estrdup(loc_name); return tag_value; } else { /* Since Grandfathered , no value , do nothing , retutn NULL */ return NULL; } } if( fromParseLocale==1 ){ /* Handle singletons */ if( strcmp(tag_name , LOC_LANG_TAG)==0 ){ if( strlen(loc_name)>1 && (isIDPrefix(loc_name) ==1 ) ){ return loc_name; } } singletonPos = getSingletonPos( loc_name ); if( singletonPos == 0){ /* singleton at start of script, region , variant etc. * or invalid singleton at start of language */ return NULL; } else if( singletonPos > 0 ){ /* singleton at some position except at start * strip off the singleton and rest of the loc_name */ mod_loc_name = estrndup ( loc_name , singletonPos-1); } } /* end of if fromParse */ } /* end of if != LOC_CANONICAL_TAG */ if( mod_loc_name == NULL){ mod_loc_name = estrdup(loc_name ); } /* Proceed to ICU */ do{ tag_value = erealloc( tag_value , buflen ); tag_value_len = buflen; if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){ buflen = uloc_getScript ( mod_loc_name ,tag_value , tag_value_len , &status); } if( strcmp(tag_name , LOC_LANG_TAG )==0 ){ buflen = uloc_getLanguage ( mod_loc_name ,tag_value , tag_value_len , &status); } if( strcmp(tag_name , LOC_REGION_TAG)==0 ){ buflen = uloc_getCountry ( mod_loc_name ,tag_value , tag_value_len , &status); } if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){ buflen = uloc_getVariant ( mod_loc_name ,tag_value , tag_value_len , &status); } if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){ buflen = uloc_canonicalize ( mod_loc_name ,tag_value , tag_value_len , &status); } if( U_FAILURE( status ) ) { if( status == U_BUFFER_OVERFLOW_ERROR ) { status = U_ZERO_ERROR; continue; } /* Error in retriving data */ *result = 0; if( tag_value ){ efree( tag_value ); } if( mod_loc_name ){ efree( mod_loc_name); } return NULL; } } while( buflen > tag_value_len ); if( buflen ==0 ){ /* No value found */ *result = -1; if( tag_value ){ efree( tag_value ); } if( mod_loc_name ){ efree( mod_loc_name); } return NULL; } else { *result = 1; } if( mod_loc_name ){ efree( mod_loc_name); } return tag_value; }
/** * Test localized currency patterns for PREEURO variants. */ static void TestCurrencyPreEuro(void) { UNumberFormat *currencyFmt; UChar *str=NULL, *res=NULL; int32_t lneed, i; UFieldPosition pos; UErrorCode status = U_ZERO_ERROR; const char* locale[]={ "ca_ES_PREEURO", "de_LU_PREEURO", "en_IE_PREEURO", "fi_FI_PREEURO", "fr_LU_PREEURO", "it_IT_PREEURO", "pt_PT_PREEURO", "de_AT_PREEURO", "el_GR_PREEURO", "es_ES_PREEURO", "fr_BE_PREEURO", "ga_IE_PREEURO", "nl_BE_PREEURO", "de_DE_PREEURO", "en_BE_PREEURO", "eu_ES_PREEURO", "fr_FR_PREEURO", "gl_ES_PREEURO", "nl_NL_PREEURO", }; const char* result[]={ "\\u20A7\\u00A02", "2\\u00A0F", "IEP\\u00A01.50", "1,50\\u00A0mk", "2\\u00A0F", "ITL\\u00A02", "1$50\\u00A0\\u200B", "\\u00F6S\\u00A01,50", "1,50\\u00A0\\u0394\\u03C1\\u03C7", "2\\u00A0\\u20A7", "1,50\\u00A0FB", "IEP\\u00A01.50", "BEF\\u00A01,50", "1,50\\u00A0DM", "1,50\\u00A0BEF", "\\u20A7\\u00A02", "1,50\\u00A0F", "2\\u00A0\\u20A7", "NLG\\u00A01,50" }; log_verbose("\nTesting the number format with different currency patterns\n"); for(i=0; i < 19; i++) { char curID[256] = {0}; uloc_canonicalize(locale[i], curID, 256, &status); if(U_FAILURE(status)){ log_data_err("Could not canonicalize %s. Error: %s (Are you missing data?)\n", locale[i], u_errorName(status)); continue; } currencyFmt = unum_open(UNUM_CURRENCY, NULL,0,curID,NULL, &status); if(U_FAILURE(status)){ log_data_err("Error in the construction of number format with style currency: %s (Are you missing data?)\n", myErrorName(status)); } else { lneed=0; lneed= unum_formatDouble(currencyFmt, 1.50, NULL, lneed, NULL, &status); if(status==U_BUFFER_OVERFLOW_ERROR){ status=U_ZERO_ERROR; str=(UChar*)malloc(sizeof(UChar) * (lneed+1) ); pos.field = 0; unum_formatDouble(currencyFmt, 1.50, str, lneed+1, &pos, &status); } if(U_FAILURE(status)) { log_err("Error in formatting using unum_formatDouble(.....): %s\n", myErrorName(status) ); } else { res=(UChar*)malloc(sizeof(UChar) * (strlen(result[i])+1) ); u_unescape(result[i],res,(int32_t)(strlen(result[i])+1)); if (u_strcmp(str, res) != 0){ log_err("FAIL: Expected %s Got: %s for locale: %s\n", result[i],aescstrdup(str, -1),locale[i]); } } } unum_close(currencyFmt); free(str); free(res); } }
U_CAPI UCollator* U_EXPORT2 ucol_openFromShortString( const char *definition, UBool forceDefaults, UParseError *parseError, UErrorCode *status) { UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN_FROM_SHORT_STRING); UTRACE_DATA1(UTRACE_INFO, "short string = \"%s\"", definition); if(U_FAILURE(*status)) return 0; UParseError internalParseError; if(!parseError) { parseError = &internalParseError; } parseError->line = 0; parseError->offset = 0; parseError->preContext[0] = 0; parseError->postContext[0] = 0; // first we want to pick stuff out of short string. // we'll end up with an UCA version, locale and a bunch of // settings // analyse the string in order to get everything we need. const char *string = definition; CollatorSpec s; ucol_sit_initCollatorSpecs(&s); string = ucol_sit_readSpecs(&s, definition, parseError, status); ucol_sit_calculateWholeLocale(&s); char buffer[internalBufferSize]; uprv_memset(buffer, 0, internalBufferSize); uloc_canonicalize(s.locale, buffer, internalBufferSize, status); UCollator *result = ucol_open(buffer, status); int32_t i = 0; for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) { if(s.options[i] != UCOL_DEFAULT) { if(forceDefaults || ucol_getAttribute(result, (UColAttribute)i, status) != s.options[i]) { ucol_setAttribute(result, (UColAttribute)i, s.options[i], status); } if(U_FAILURE(*status)) { parseError->offset = (int32_t)(string - definition); ucol_close(result); return NULL; } } } if(s.variableTopSet) { if(s.variableTopString[0]) { ucol_setVariableTop(result, s.variableTopString, s.variableTopStringLen, status); } else { // we set by value, using 'B' ucol_restoreVariableTop(result, s.variableTopValue, status); } } if(U_FAILURE(*status)) { // here it can only be a bogus value ucol_close(result); result = NULL; } UTRACE_EXIT_PTR_STATUS(result, *status); return result; }
U_CDECL_END U_NAMESPACE_BEGIN UnicodeString & LocaleUtility::canonicalLocaleString(const UnicodeString * id, UnicodeString & result) { if (id == NULL) { result.setToBogus(); } else { // Fix case only (no other changes) up to the first '@' or '.' or // end of string, whichever comes first. In 3.0 I changed this to // stop at first '@' or '.'. It used to run out to the end of // string. My fix makes the tests pass but is probably // structurally incorrect. See below. [alan 3.0] // TODO: Doug, you might want to revise this... result = *id; int32_t i = 0; int32_t end = result.indexOf(AT_SIGN_CHAR); int32_t n = result.indexOf(PERIOD_CHAR); if (n >= 0 && n < end) { end = n; } if (end < 0) { end = result.length(); } n = result.indexOf(UNDERSCORE_CHAR); if (n < 0) { n = end; } for (; i < n; ++i) { UChar c = result.charAt(i); if (c >= 0x0041 && c <= 0x005a) { c += 0x20; result.setCharAt(i, c); } } for (n = end; i < n; ++i) { UChar c = result.charAt(i); if (c >= 0x0061 && c <= 0x007a) { c -= 0x20; result.setCharAt(i, c); } } } return result; #if 0 // This code does a proper full level 2 canonicalization of id. // It's nasty to go from UChar to char to char to UChar -- but // that's what you have to do to use the uloc_canonicalize // function on UnicodeStrings. // I ended up doing the alternate fix (see above) not for // performance reasons, although performance will certainly be // better, but because doing a full level 2 canonicalization // causes some tests to fail. [alan 3.0] // TODO: Doug, you might want to revisit this... result.setToBogus(); if (id != 0) { int32_t buflen = id->length() + 8; // space for NUL char * buf = (char *) uprv_malloc(buflen); char * canon = (buf == 0) ? 0 : (char *) uprv_malloc(buflen); if (buf != 0 && canon != 0) { U_ASSERT(id->extract(0, INT32_MAX, buf, buflen) < buflen); UErrorCode ec = U_ZERO_ERROR; uloc_canonicalize(buf, canon, buflen, &ec); if (U_SUCCESS(ec)) { result = UnicodeString(canon); } } uprv_free(buf); uprv_free(canon); } return result; #endif }