Transliterator* AnyTransliterator::getTransliterator(UScriptCode source) const { if (source == targetScript || source == USCRIPT_INVALID_CODE) { return NULL; } Transliterator* t = (Transliterator*) uhash_iget(cache, (int32_t) source); if (t == NULL) { UErrorCode ec = U_ZERO_ERROR; UnicodeString sourceName(uscript_getName(source), -1, US_INV); UnicodeString id(sourceName); id.append(TARGET_SEP).append(target); t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); if (U_FAILURE(ec) || t == NULL) { delete t; // Try to pivot around Latin, our most common script id = sourceName; id.append(LATIN_PIVOT, -1).append(target); t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); if (U_FAILURE(ec) || t == NULL) { delete t; t = NULL; } } if (t != NULL) { uhash_iput(cache, (int32_t) source, t, &ec); } } return t; }
Transliterator* AnyTransliterator::getTransliterator(UScriptCode source) const { if (source == targetScript || source == USCRIPT_INVALID_CODE) { return NULL; } Transliterator* t = NULL; { Mutex m(NULL); t = (Transliterator*) uhash_iget(cache, (int32_t) source); } if (t == NULL) { UErrorCode ec = U_ZERO_ERROR; UnicodeString sourceName(uscript_getShortName(source), -1, US_INV); UnicodeString id(sourceName); id.append(TARGET_SEP).append(target); t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); if (U_FAILURE(ec) || t == NULL) { delete t; // Try to pivot around Latin, our most common script id = sourceName; id.append(LATIN_PIVOT, -1).append(target); t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); if (U_FAILURE(ec) || t == NULL) { delete t; t = NULL; } } if (t != NULL) { Transliterator *rt = NULL; { Mutex m(NULL); rt = static_cast<Transliterator *> (uhash_iget(cache, (int32_t) source)); if (rt == NULL) { // Common case, no race to cache this new transliterator. uhash_iput(cache, (int32_t) source, t, &ec); } else { // Race case, some other thread beat us to caching this transliterator. Transliterator *temp = rt; rt = t; // Our newly created transliterator that lost the race & now needs deleting. t = temp; // The transliterator from the cache that we will return. } } delete rt; // will be non-null only in case of races. } } return t; }
void CEToStringsMap::putStringList(uint32_t ce, StringList *stringList, UErrorCode &status) { uhash_iput(map, ce, (void *) stringList, &status); }
void ConfusabledataBuilder::build(const char * confusables, int32_t confusablesLen, UErrorCode &status) { // Convert the user input data from UTF-8 to UChar (UTF-16) int32_t inputLen = 0; if (U_FAILURE(status)) { return; } u_strFromUTF8(NULL, 0, &inputLen, confusables, confusablesLen, &status); if (status != U_BUFFER_OVERFLOW_ERROR) { return; } status = U_ZERO_ERROR; fInput = static_cast<UChar *>(uprv_malloc((inputLen+1) * sizeof(UChar))); if (fInput == NULL) { status = U_MEMORY_ALLOCATION_ERROR; } u_strFromUTF8(fInput, inputLen+1, NULL, confusables, confusablesLen, &status); // Regular Expression to parse a line from Confusables.txt. The expression will match // any line. What was matched is determined by examining which capture groups have a match. // Capture Group 1: the source char // Capture Group 2: the replacement chars // Capture Group 3-6 the table type, SL, SA, ML, or MA // Capture Group 7: A blank or comment only line. // Capture Group 8: A syntactically invalid line. Anything that didn't match before. // Example Line from the confusables.txt source file: // "1D702 ; 006E 0329 ; SL # MATHEMATICAL ITALIC SMALL ETA ... " fParseLine = uregex_openC( "(?m)^[ \\t]*([0-9A-Fa-f]+)[ \\t]+;" // Match the source char "[ \\t]*([0-9A-Fa-f]+" // Match the replacement char(s) "(?:[ \\t]+[0-9A-Fa-f]+)*)[ \\t]*;" // (continued) "\\s*(?:(SL)|(SA)|(ML)|(MA))" // Match the table type "[ \\t]*(?:#.*?)?$" // Match any trailing #comment "|^([ \\t]*(?:#.*?)?)$" // OR match empty lines or lines with only a #comment "|^(.*?)$", // OR match any line, which catches illegal lines. 0, NULL, &status); // Regular expression for parsing a hex number out of a space-separated list of them. // Capture group 1 gets the number, with spaces removed. fParseHexNum = uregex_openC("\\s*([0-9A-F]+)", 0, NULL, &status); // Zap any Byte Order Mark at the start of input. Changing it to a space is benign // given the syntax of the input. if (*fInput == 0xfeff) { *fInput = 0x20; } // Parse the input, one line per iteration of this loop. uregex_setText(fParseLine, fInput, inputLen, &status); while (uregex_findNext(fParseLine, &status)) { fLineNum++; if (uregex_start(fParseLine, 7, &status) >= 0) { // this was a blank or comment line. continue; } if (uregex_start(fParseLine, 8, &status) >= 0) { // input file syntax error. status = U_PARSE_ERROR; return; } // We have a good input line. Extract the key character and mapping string, and // put them into the appropriate mapping table. UChar32 keyChar = SpoofImpl::ScanHex(fInput, uregex_start(fParseLine, 1, &status), uregex_end(fParseLine, 1, &status), status); int32_t mapStringStart = uregex_start(fParseLine, 2, &status); int32_t mapStringLength = uregex_end(fParseLine, 2, &status) - mapStringStart; uregex_setText(fParseHexNum, &fInput[mapStringStart], mapStringLength, &status); UnicodeString *mapString = new UnicodeString(); if (mapString == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return; } while (uregex_findNext(fParseHexNum, &status)) { UChar32 c = SpoofImpl::ScanHex(&fInput[mapStringStart], uregex_start(fParseHexNum, 1, &status), uregex_end(fParseHexNum, 1, &status), status); mapString->append(c); } U_ASSERT(mapString->length() >= 1); // Put the map (value) string into the string pool // This a little like a Java intern() - any duplicates will be eliminated. SPUString *smapString = stringPool->addString(mapString, status); // Add the UChar32 -> string mapping to the appropriate table. UHashtable *table = uregex_start(fParseLine, 3, &status) >= 0 ? fSLTable : uregex_start(fParseLine, 4, &status) >= 0 ? fSATable : uregex_start(fParseLine, 5, &status) >= 0 ? fMLTable : uregex_start(fParseLine, 6, &status) >= 0 ? fMATable : NULL; U_ASSERT(table != NULL); uhash_iput(table, keyChar, smapString, &status); fKeySet->add(keyChar); if (U_FAILURE(status)) { return; } } // Input data is now all parsed and collected. // Now create the run-time binary form of the data. // // This is done in two steps. First the data is assembled into vectors and strings, // for ease of construction, then the contents of these collections are dumped // into the actual raw-bytes data storage. // Build up the string array, and record the index of each string therein // in the (build time only) string pool. // Strings of length one are not entered into the strings array. // At the same time, build up the string lengths table, which records the // position in the string table of the first string of each length >= 4. // (Strings in the table are sorted by length) stringPool->sort(status); fStringTable = new UnicodeString(); fStringLengthsTable = new UVector(status); int32_t previousStringLength = 0; int32_t previousStringIndex = 0; int32_t poolSize = stringPool->size(); int32_t i; for (i=0; i<poolSize; i++) { SPUString *s = stringPool->getByIndex(i); int32_t strLen = s->fStr->length(); int32_t strIndex = fStringTable->length(); U_ASSERT(strLen >= previousStringLength); if (strLen == 1) { // strings of length one do not get an entry in the string table. // Keep the single string character itself here, which is the same // convention that is used in the final run-time string table index. s->fStrTableIndex = s->fStr->charAt(0); } else { if ((strLen > previousStringLength) && (previousStringLength >= 4)) { fStringLengthsTable->addElement(previousStringIndex, status); fStringLengthsTable->addElement(previousStringLength, status); } s->fStrTableIndex = strIndex; fStringTable->append(*(s->fStr)); } previousStringLength = strLen; previousStringIndex = strIndex; } // Make the final entry to the string lengths table. // (it holds an entry for the _last_ string of each length, so adding the // final one doesn't happen in the main loop because no longer string was encountered.) if (previousStringLength >= 4) { fStringLengthsTable->addElement(previousStringIndex, status); fStringLengthsTable->addElement(previousStringLength, status); } // Construct the compile-time Key and Value tables // // For each key code point, check which mapping tables it applies to, // and create the final data for the key & value structures. // // The four logical mapping tables are conflated into one combined table. // If multiple logical tables have the same mapping for some key, they // share a single entry in the combined table. // If more than one mapping exists for the same key code point, multiple // entries will be created in the table for (int32_t range=0; range<fKeySet->getRangeCount(); range++) { // It is an oddity of the UnicodeSet API that simply enumerating the contained // code points requires a nested loop. for (UChar32 keyChar=fKeySet->getRangeStart(range); keyChar <= fKeySet->getRangeEnd(range); keyChar++) { addKeyEntry(keyChar, fSLTable, USPOOF_SL_TABLE_FLAG, status); addKeyEntry(keyChar, fSATable, USPOOF_SA_TABLE_FLAG, status); addKeyEntry(keyChar, fMLTable, USPOOF_ML_TABLE_FLAG, status); addKeyEntry(keyChar, fMATable, USPOOF_MA_TABLE_FLAG, status); } } // Put the assembled data into the flat runtime array outputData(status); // All of the intermediate allocated data belongs to the ConfusabledataBuilder // object (this), and is deleted in the destructor. return; }
/* * Initializes the region data from the ICU resource bundles. The region data * contains the basic relationships such as which regions are known, what the numeric * codes are, any known aliases, and the territory containment data. * * If the region data has already loaded, then this method simply returns without doing * anything meaningful. */ void Region::loadRegionData() { if (regionDataIsLoaded) { return; } umtx_lock(&gRegionDataLock); if (regionDataIsLoaded) { // In case another thread gets to it before we do... umtx_unlock(&gRegionDataLock); return; } UErrorCode status = U_ZERO_ERROR; UResourceBundle* regionCodes = NULL; UResourceBundle* territoryAlias = NULL; UResourceBundle* codeMappings = NULL; UResourceBundle* worldContainment = NULL; UResourceBundle* territoryContainment = NULL; UResourceBundle* groupingContainment = NULL; DecimalFormat *df = new DecimalFormat(status); df->setParseIntegerOnly(TRUE); regionIDMap = uhash_open(uhash_hashUnicodeString,uhash_compareUnicodeString,NULL,&status); uhash_setValueDeleter(regionIDMap, deleteRegion); numericCodeMap = uhash_open(uhash_hashLong,uhash_compareLong,NULL,&status); regionAliases = uhash_open(uhash_hashUnicodeString,uhash_compareUnicodeString,NULL,&status); uhash_setKeyDeleter(regionAliases,uprv_deleteUObject); UResourceBundle *rb = ures_openDirect(NULL,"metadata",&status); regionCodes = ures_getByKey(rb,"regionCodes",NULL,&status); territoryAlias = ures_getByKey(rb,"territoryAlias",NULL,&status); UResourceBundle *rb2 = ures_openDirect(NULL,"supplementalData",&status); codeMappings = ures_getByKey(rb2,"codeMappings",NULL,&status); territoryContainment = ures_getByKey(rb2,"territoryContainment",NULL,&status); worldContainment = ures_getByKey(territoryContainment,"001",NULL,&status); groupingContainment = ures_getByKey(territoryContainment,"grouping",NULL,&status); UVector *continents = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); while ( ures_hasNext(worldContainment) ) { UnicodeString *continentName = new UnicodeString(ures_getNextUnicodeString(worldContainment,NULL,&status)); continents->addElement(continentName,status); } UVector *groupings = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); while ( ures_hasNext(groupingContainment) ) { UnicodeString *groupingName = new UnicodeString(ures_getNextUnicodeString(groupingContainment,NULL,&status)); groupings->addElement(groupingName,status); } while ( ures_hasNext(regionCodes) ) { UnicodeString regionID = ures_getNextUnicodeString(regionCodes,NULL,&status); Region *r = new Region(); r->idStr = regionID; r->idStr.extract(0,r->idStr.length(),r->id,sizeof(r->id),US_INV); r->type = URGN_TERRITORY; // Only temporary - figure out the real type later once the aliases are known. uhash_put(regionIDMap,(void *)&(r->idStr),(void *)r,&status); Formattable result; UErrorCode ps = U_ZERO_ERROR; df->parse(r->idStr,result,ps); if ( U_SUCCESS(ps) ) { r->code = result.getLong(); // Convert string to number uhash_iput(numericCodeMap,r->code,(void *)r,&status); r->type = URGN_SUBCONTINENT; } else { r->code = Region::UNDEFINED_NUMERIC_CODE; } } // Process the territory aliases while ( ures_hasNext(territoryAlias) ) { UResourceBundle *res = ures_getNextResource(territoryAlias,NULL,&status); const char *aliasFrom = ures_getKey(res); UnicodeString* aliasFromStr = new UnicodeString(aliasFrom); UnicodeString aliasTo = ures_getUnicodeString(res,&status); ures_close(res); Region *aliasToRegion = (Region *) uhash_get(regionIDMap,&aliasTo); Region *aliasFromRegion = (Region *)uhash_get(regionIDMap,aliasFromStr); if ( aliasToRegion != NULL && aliasFromRegion == NULL ) { // This is just an alias from some string to a region uhash_put(regionAliases,(void *)aliasFromStr, (void *)aliasToRegion,&status); } else { if ( aliasFromRegion == NULL ) { // Deprecated region code not in the master codes list - so need to create a deprecated region for it. aliasFromRegion = new Region(); aliasFromRegion->idStr.setTo(*aliasFromStr); aliasFromRegion->idStr.extract(0,aliasFromRegion->idStr.length(),aliasFromRegion->id,sizeof(aliasFromRegion->id),US_INV); uhash_put(regionIDMap,(void *)&(aliasFromRegion->idStr),(void *)aliasFromRegion,&status); Formattable result; UErrorCode ps = U_ZERO_ERROR; df->parse(aliasFromRegion->idStr,result,ps); if ( U_SUCCESS(ps) ) { aliasFromRegion->code = result.getLong(); // Convert string to number uhash_iput(numericCodeMap,aliasFromRegion->code,(void *)aliasFromRegion,&status); } else { aliasFromRegion->code = Region::UNDEFINED_NUMERIC_CODE; } aliasFromRegion->type = URGN_DEPRECATED; } else { aliasFromRegion->type = URGN_DEPRECATED; } delete aliasFromStr; aliasFromRegion->preferredValues = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); UnicodeString currentRegion; currentRegion.remove(); for (int32_t i = 0 ; i < aliasTo.length() ; i++ ) { if ( aliasTo.charAt(i) != 0x0020 ) { currentRegion.append(aliasTo.charAt(i)); } if ( aliasTo.charAt(i) == 0x0020 || i+1 == aliasTo.length() ) { Region *target = (Region *)uhash_get(regionIDMap,(void *)¤tRegion); if (target) { UnicodeString *preferredValue = new UnicodeString(target->idStr); aliasFromRegion->preferredValues->addElement((void *)preferredValue,status); } currentRegion.remove(); } } } } // Process the code mappings - This will allow us to assign numeric codes to most of the territories. while ( ures_hasNext(codeMappings) ) { UResourceBundle *mapping = ures_getNextResource(codeMappings,NULL,&status); if ( ures_getType(mapping) == URES_ARRAY && ures_getSize(mapping) == 3) { UnicodeString codeMappingID = ures_getUnicodeStringByIndex(mapping,0,&status); UnicodeString codeMappingNumber = ures_getUnicodeStringByIndex(mapping,1,&status); UnicodeString codeMapping3Letter = ures_getUnicodeStringByIndex(mapping,2,&status); Region *r = (Region *)uhash_get(regionIDMap,(void *)&codeMappingID); if ( r ) { Formattable result; UErrorCode ps = U_ZERO_ERROR; df->parse(codeMappingNumber,result,ps); if ( U_SUCCESS(ps) ) { r->code = result.getLong(); // Convert string to number uhash_iput(numericCodeMap,r->code,(void *)r,&status); } UnicodeString *code3 = new UnicodeString(codeMapping3Letter); uhash_put(regionAliases,(void *)code3, (void *)r,&status); } } ures_close(mapping); } // Now fill in the special cases for WORLD, UNKNOWN, CONTINENTS, and GROUPINGS Region *r; r = (Region *) uhash_get(regionIDMap,(void *)&WORLD_ID); if ( r ) { r->type = URGN_WORLD; } r = (Region *) uhash_get(regionIDMap,(void *)&UNKNOWN_REGION_ID); if ( r ) { r->type = URGN_UNKNOWN; } for ( int32_t i = 0 ; i < continents->size() ; i++ ) { r = (Region *) uhash_get(regionIDMap,(void *)continents->elementAt(i)); if ( r ) { r->type = URGN_CONTINENT; } } delete continents; for ( int32_t i = 0 ; i < groupings->size() ; i++ ) { r = (Region *) uhash_get(regionIDMap,(void *)groupings->elementAt(i)); if ( r ) { r->type = URGN_GROUPING; } } delete groupings; // Special case: The region code "QO" (Outlying Oceania) is a subcontinent code added by CLDR // even though it looks like a territory code. Need to handle it here. r = (Region *) uhash_get(regionIDMap,(void *)&OUTLYING_OCEANIA_REGION_ID); if ( r ) { r->type = URGN_SUBCONTINENT; } // Load territory containment info from the supplemental data. while ( ures_hasNext(territoryContainment) ) { UResourceBundle *mapping = ures_getNextResource(territoryContainment,NULL,&status); const char *parent = ures_getKey(mapping); UnicodeString parentStr = UnicodeString(parent); Region *parentRegion = (Region *) uhash_get(regionIDMap,(void *)&parentStr); for ( int j = 0 ; j < ures_getSize(mapping); j++ ) { UnicodeString child = ures_getUnicodeStringByIndex(mapping,j,&status); Region *childRegion = (Region *) uhash_get(regionIDMap,(void *)&child); if ( parentRegion != NULL && childRegion != NULL ) { // Add the child region to the set of regions contained by the parent if (parentRegion->containedRegions == NULL) { parentRegion->containedRegions = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); } UnicodeString *childStr = new UnicodeString(status); childStr->fastCopyFrom(childRegion->idStr); parentRegion->containedRegions->addElement((void *)childStr,status); // Set the parent region to be the containing region of the child. // Regions of type GROUPING can't be set as the parent, since another region // such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent. if ( parentRegion->type != URGN_GROUPING) { childRegion->containingRegion = parentRegion; } } } ures_close(mapping); } // Create the availableRegions lists int32_t pos = -1; while ( const UHashElement* element = uhash_nextElement(regionIDMap,&pos)) { Region *ar = (Region *)element->value.pointer; if ( availableRegions[ar->type] == NULL ) { availableRegions[ar->type] = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); } UnicodeString *arString = new UnicodeString(ar->idStr); availableRegions[ar->type]->addElement((void *)arString,status); } ures_close(territoryContainment); ures_close(worldContainment); ures_close(groupingContainment); ures_close(codeMappings); ures_close(rb2); ures_close(territoryAlias); ures_close(regionCodes); ures_close(rb); delete df; ucln_i18n_registerCleanup(UCLN_I18N_REGION, region_cleanup); regionDataIsLoaded = true; umtx_unlock(&gRegionDataLock); }
NumberFormat* NumberFormat::makeInstance(const Locale& desiredLocale, EStyles style, UErrorCode& status) { if (U_FAILURE(status)) return NULL; if (style < 0 || style >= kStyleCount) { status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } #ifdef U_WINDOWS char buffer[8]; int32_t count = desiredLocale.getKeywordValue("compat", buffer, sizeof(buffer), status); // if the locale has "@compat=host", create a host-specific NumberFormat if (count > 0 && uprv_strcmp(buffer, "host") == 0) { Win32NumberFormat *f = NULL; UBool curr = TRUE; switch (style) { case kNumberStyle: curr = FALSE; // fall-through case kCurrencyStyle: case kIsoCurrencyStyle: // do not support plural formatting here case kPluralCurrencyStyle: f = new Win32NumberFormat(desiredLocale, curr, status); if (U_SUCCESS(status)) { return f; } delete f; break; default: break; } } #endif NumberFormat* f = NULL; DecimalFormatSymbols* symbolsToAdopt = NULL; UnicodeString pattern; UResourceBundle *resource = ures_open((char *)0, desiredLocale.getName(), &status); UResourceBundle *numberPatterns = ures_getByKey(resource, DecimalFormat::fgNumberPatterns, NULL, &status); NumberingSystem *ns = NULL; UBool deleteSymbols = TRUE; UHashtable * cache = NULL; int32_t hashKey; UBool getCache = FALSE; UBool deleteNS = FALSE; if (U_FAILURE(status)) { // We don't appear to have resource data available -- use the last-resort data status = U_USING_FALLBACK_WARNING; // When the data is unavailable, and locale isn't passed in, last resort data is used. symbolsToAdopt = new DecimalFormatSymbols(status); // Creates a DecimalFormat instance with the last resort number patterns. pattern.setTo(TRUE, gLastResortNumberPatterns[style], -1); } else { // If not all the styled patterns exists for the NumberFormat in this locale, // sets the status code to failure and returns nil. if (ures_getSize(numberPatterns) < (int32_t)(sizeof(gLastResortNumberPatterns)/sizeof(gLastResortNumberPatterns[0])) -2 ) { //minus 2: ISO and plural status = U_INVALID_FORMAT_ERROR; goto cleanup; } // Loads the decimal symbols of the desired locale. symbolsToAdopt = new DecimalFormatSymbols(desiredLocale, status); int32_t patLen = 0; /* for ISOCURRENCYSTYLE and PLURALCURRENCYSTYLE, * the pattern is the same as the pattern of CURRENCYSTYLE * but by replacing the single currency sign with * double currency sign or triple currency sign. */ int styleInNumberPattern = ((style == kIsoCurrencyStyle || style == kPluralCurrencyStyle) ? kCurrencyStyle : style); const UChar *patResStr = ures_getStringByIndex(numberPatterns, (int32_t)styleInNumberPattern, &patLen, &status); // Creates the specified decimal format style of the desired locale. pattern.setTo(TRUE, patResStr, patLen); } if (U_FAILURE(status) || symbolsToAdopt == NULL) { goto cleanup; } if(style==kCurrencyStyle || style == kIsoCurrencyStyle){ const UChar* currPattern = symbolsToAdopt->getCurrencyPattern(); if(currPattern!=NULL){ pattern.setTo(currPattern, u_strlen(currPattern)); } } // Use numbering system cache hashtable UMTX_CHECK(&nscacheMutex, (UBool)(cache != NumberingSystem_cache), getCache); if (getCache) { umtx_lock(&nscacheMutex); cache = NumberingSystem_cache; umtx_unlock(&nscacheMutex); } // Check cache we got, create if non-existant status = U_ZERO_ERROR; if (cache == NULL) { cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &status); if (cache == NULL || U_FAILURE(status)) { // cache not created - out of memory cache = NULL; } else { // cache created uhash_setValueDeleter(cache, deleteNumberingSystem); // set final NumberingSystem_cache value UHashtable* h = NULL; UMTX_CHECK(&nscacheMutex, (UBool)(h != NumberingSystem_cache), getCache); if (getCache) { umtx_lock(&nscacheMutex); h = NumberingSystem_cache; umtx_unlock(&nscacheMutex); } if (h == NULL) { umtx_lock(&nscacheMutex); NumberingSystem_cache = h = cache; cache = NULL; ucln_i18n_registerCleanup(UCLN_I18N_NUMFMT, numfmt_cleanup); umtx_unlock(&nscacheMutex); } if(cache != NULL) { uhash_close(cache); } cache = h; } } // Get cached numbering system if (cache != NULL) { hashKey = desiredLocale.hashCode(); umtx_lock(&nscacheMutex); ns = (NumberingSystem *)uhash_iget(cache, hashKey); if (ns == NULL) { ns = NumberingSystem::createInstance(desiredLocale,status); uhash_iput(cache, hashKey, (void*)ns, &status); } umtx_unlock(&nscacheMutex); } else { ns = NumberingSystem::createInstance(desiredLocale,status); deleteNS = TRUE; } // check results of getting a numbering system if ((ns == NULL) || (U_FAILURE(status))) { goto cleanup; } if (ns->isAlgorithmic()) { UnicodeString nsDesc; UnicodeString nsRuleSetGroup; UnicodeString nsRuleSetName; Locale nsLoc; URBNFRuleSetTag desiredRulesType = URBNF_NUMBERING_SYSTEM; nsDesc.setTo(ns->getDescription()); int32_t firstSlash = nsDesc.indexOf(gSlash); int32_t lastSlash = nsDesc.lastIndexOf(gSlash); if ( lastSlash > firstSlash ) { char nsLocID[ULOC_FULLNAME_CAPACITY]; nsDesc.extract(0,firstSlash,nsLocID,ULOC_FULLNAME_CAPACITY,US_INV); nsRuleSetGroup.setTo(nsDesc,firstSlash+1,lastSlash-firstSlash-1); nsRuleSetName.setTo(nsDesc,lastSlash+1); nsLoc = Locale::createFromName(nsLocID); UnicodeString SpelloutRules = UNICODE_STRING_SIMPLE("SpelloutRules"); if ( nsRuleSetGroup.compare(SpelloutRules) == 0 ) { desiredRulesType = URBNF_SPELLOUT; } } else { nsLoc = desiredLocale; nsRuleSetName.setTo(nsDesc); } RuleBasedNumberFormat *r = new RuleBasedNumberFormat(desiredRulesType,nsLoc,status); if (U_FAILURE(status) || r == NULL) { goto cleanup; } r->setDefaultRuleSet(nsRuleSetName,status); f = (NumberFormat *) r; } else { // replace single currency sign in the pattern with double currency sign // if the style is kIsoCurrencyStyle if (style == kIsoCurrencyStyle) { pattern.findAndReplace(gSingleCurrencySign, gDoubleCurrencySign); } f = new DecimalFormat(pattern, symbolsToAdopt, style, status); if (U_FAILURE(status) || f == NULL) { goto cleanup; } deleteSymbols = FALSE; } f->setLocaleIDs(ures_getLocaleByType(numberPatterns, ULOC_VALID_LOCALE, &status), ures_getLocaleByType(numberPatterns, ULOC_ACTUAL_LOCALE, &status)); cleanup: ures_close(numberPatterns); ures_close(resource); if (deleteNS && ns) { delete ns; } if (U_FAILURE(status)) { /* If f exists, then it will delete the symbols */ if (f==NULL) { delete symbolsToAdopt; } else { delete f; } return NULL; } if (f == NULL || symbolsToAdopt == NULL) { status = U_MEMORY_ALLOCATION_ERROR; f = NULL; } if (deleteSymbols && symbolsToAdopt != NULL) { delete symbolsToAdopt; } return f; }
NumberFormat* NumberFormat::makeInstance(const Locale& desiredLocale, UNumberFormatStyle style, UBool mustBeDecimalFormat, UErrorCode& status) { if (U_FAILURE(status)) return NULL; if (style < 0 || style >= UNUM_FORMAT_STYLE_COUNT) { status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } // Some styles are not supported. This is a result of merging // the @draft ICU 4.2 NumberFormat::EStyles into the long-existing UNumberFormatStyle. // Ticket #8503 is for reviewing/fixing/merging the two relevant implementations: // this one and unum_open(). // The UNUM_PATTERN_ styles are not supported here // because this method does not take a pattern string. if (!isStyleSupported(style)) { status = U_UNSUPPORTED_ERROR; return NULL; } #if U_PLATFORM_USES_ONLY_WIN32_API if (!mustBeDecimalFormat) { char buffer[8]; int32_t count = desiredLocale.getKeywordValue("compat", buffer, sizeof(buffer), status); // if the locale has "@compat=host", create a host-specific NumberFormat if (U_SUCCESS(status) && count > 0 && uprv_strcmp(buffer, "host") == 0) { Win32NumberFormat *f = NULL; UBool curr = TRUE; switch (style) { case UNUM_DECIMAL: curr = FALSE; // fall-through case UNUM_CURRENCY: case UNUM_CURRENCY_ISO: // do not support plural formatting here case UNUM_CURRENCY_PLURAL: f = new Win32NumberFormat(desiredLocale, curr, status); if (U_SUCCESS(status)) { return f; } delete f; break; default: break; } } } #endif // Use numbering system cache hashtable umtx_initOnce(gNSCacheInitOnce, &nscacheInit); // Get cached numbering system LocalPointer<NumberingSystem> ownedNs; NumberingSystem *ns = NULL; if (NumberingSystem_cache != NULL) { // TODO: Bad hash key usage, see ticket #8504. int32_t hashKey = desiredLocale.hashCode(); Mutex lock(&nscacheMutex); ns = (NumberingSystem *)uhash_iget(NumberingSystem_cache, hashKey); if (ns == NULL) { ns = NumberingSystem::createInstance(desiredLocale,status); uhash_iput(NumberingSystem_cache, hashKey, (void*)ns, &status); } } else { ownedNs.adoptInstead(NumberingSystem::createInstance(desiredLocale,status)); ns = ownedNs.getAlias(); } // check results of getting a numbering system if (U_FAILURE(status)) { return NULL; } if (mustBeDecimalFormat && ns->isAlgorithmic()) { status = U_UNSUPPORTED_ERROR; return NULL; } LocalPointer<DecimalFormatSymbols> symbolsToAdopt; UnicodeString pattern; LocalUResourceBundlePointer ownedResource(ures_open(NULL, desiredLocale.getName(), &status)); if (U_FAILURE(status)) { // We don't appear to have resource data available -- use the last-resort data status = U_USING_FALLBACK_WARNING; // When the data is unavailable, and locale isn't passed in, last resort data is used. symbolsToAdopt.adoptInstead(new DecimalFormatSymbols(status)); if (symbolsToAdopt.isNull()) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } // Creates a DecimalFormat instance with the last resort number patterns. pattern.setTo(TRUE, gLastResortNumberPatterns[style], -1); } else { // Loads the decimal symbols of the desired locale. symbolsToAdopt.adoptInstead(new DecimalFormatSymbols(desiredLocale, status)); if (symbolsToAdopt.isNull()) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } UResourceBundle *resource = ownedResource.orphan(); UResourceBundle *numElements = ures_getByKeyWithFallback(resource, gNumberElements, NULL, &status); resource = ures_getByKeyWithFallback(numElements, ns->getName(), resource, &status); resource = ures_getByKeyWithFallback(resource, gPatterns, resource, &status); ownedResource.adoptInstead(resource); int32_t patLen = 0; const UChar *patResStr = ures_getStringByKeyWithFallback(resource, gFormatKeys[style], &patLen, &status); // Didn't find a pattern specific to the numbering system, so fall back to "latn" if ( status == U_MISSING_RESOURCE_ERROR && uprv_strcmp(gLatn,ns->getName())) { status = U_ZERO_ERROR; resource = ures_getByKeyWithFallback(numElements, gLatn, resource, &status); resource = ures_getByKeyWithFallback(resource, gPatterns, resource, &status); patResStr = ures_getStringByKeyWithFallback(resource, gFormatKeys[style], &patLen, &status); } ures_close(numElements); // Creates the specified decimal format style of the desired locale. pattern.setTo(TRUE, patResStr, patLen); } if (U_FAILURE(status)) { return NULL; } if(style==UNUM_CURRENCY || style == UNUM_CURRENCY_ISO){ const UChar* currPattern = symbolsToAdopt->getCurrencyPattern(); if(currPattern!=NULL){ pattern.setTo(currPattern, u_strlen(currPattern)); } } NumberFormat *f; if (ns->isAlgorithmic()) { UnicodeString nsDesc; UnicodeString nsRuleSetGroup; UnicodeString nsRuleSetName; Locale nsLoc; URBNFRuleSetTag desiredRulesType = URBNF_NUMBERING_SYSTEM; nsDesc.setTo(ns->getDescription()); int32_t firstSlash = nsDesc.indexOf(gSlash); int32_t lastSlash = nsDesc.lastIndexOf(gSlash); if ( lastSlash > firstSlash ) { CharString nsLocID; nsLocID.appendInvariantChars(nsDesc.tempSubString(0, firstSlash), status); nsRuleSetGroup.setTo(nsDesc,firstSlash+1,lastSlash-firstSlash-1); nsRuleSetName.setTo(nsDesc,lastSlash+1); nsLoc = Locale::createFromName(nsLocID.data()); UnicodeString SpelloutRules = UNICODE_STRING_SIMPLE("SpelloutRules"); if ( nsRuleSetGroup.compare(SpelloutRules) == 0 ) { desiredRulesType = URBNF_SPELLOUT; } } else { nsLoc = desiredLocale; nsRuleSetName.setTo(nsDesc); } RuleBasedNumberFormat *r = new RuleBasedNumberFormat(desiredRulesType,nsLoc,status); if (r == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } r->setDefaultRuleSet(nsRuleSetName,status); f = r; } else { // replace single currency sign in the pattern with double currency sign // if the style is UNUM_CURRENCY_ISO if (style == UNUM_CURRENCY_ISO) { pattern.findAndReplace(UnicodeString(TRUE, gSingleCurrencySign, 1), UnicodeString(TRUE, gDoubleCurrencySign, 2)); } // "new DecimalFormat()" does not adopt the symbols if its memory allocation fails. DecimalFormatSymbols *syms = symbolsToAdopt.orphan(); f = new DecimalFormat(pattern, syms, style, status); if (f == NULL) { delete syms; status = U_MEMORY_ALLOCATION_ERROR; return NULL; } } f->setLocaleIDs(ures_getLocaleByType(ownedResource.getAlias(), ULOC_VALID_LOCALE, &status), ures_getLocaleByType(ownedResource.getAlias(), ULOC_ACTUAL_LOCALE, &status)); if (U_FAILURE(status)) { delete f; return NULL; } return f; }
static void TestOtherAPI(void){ UErrorCode status = U_ZERO_ERROR; UHashtable *hash; /* Use the correct type when cast to void * */ static const UChar one[4] = {0x006F, 0x006E, 0x0065, 0}; /* L"one" */ static const UChar one2[4] = {0x006F, 0x006E, 0x0065, 0}; /* Get around compiler optimizations */ static const UChar two[4] = {0x0074, 0x0077, 0x006F, 0}; /* L"two" */ static const UChar two2[4] = {0x0074, 0x0077, 0x006F, 0}; /* L"two" */ static const UChar three[6] = {0x0074, 0x0068, 0x0072, 0x0065, 0x0065, 0}; /* L"three" */ static const UChar four[6] = {0x0066, 0x006F, 0x0075, 0x0072, 0}; /* L"four" */ static const UChar five[6] = {0x0066, 0x0069, 0x0076, 0x0065, 0}; /* L"five" */ static const UChar five2[6] = {0x0066, 0x0069, 0x0076, 0x0065, 0}; /* L"five" */ hash = uhash_open(uhash_hashUChars, uhash_compareUChars, NULL, &status); if (U_FAILURE(status)) { log_err("FAIL: uhash_open failed with %s and returned 0x%08x\n", u_errorName(status), hash); return; } if (hash == NULL) { log_err("FAIL: uhash_open returned NULL\n"); return; } log_verbose("Ok: uhash_open returned 0x%08X\n", hash); uhash_puti(hash, (void*)one, 1, &status); if(uhash_count(hash) != 1){ log_err("FAIL: uhas_count() failed. Expected: 1, Got: %d\n", uhash_count(hash)); } if(uhash_find(hash, (void*)two) != NULL){ log_err("FAIL: uhash_find failed\n"); } uhash_puti(hash, (void*)two, 2, &status); uhash_puti(hash, (void*)three, 3, &status); uhash_puti(hash, (void*)four, 4, &status); uhash_puti(hash, (void*)five, 5, &status); if(uhash_count(hash) != 5){ log_err("FAIL: uhas_count() failed. Expected: 5, Got: %d\n", uhash_count(hash)); } if(uhash_geti(hash, (void*)two2) != 2){ log_err("FAIL: uhash_geti failed\n"); } if(uhash_find(hash, (void*)two2) == NULL){ log_err("FAIL: uhash_find of \"two\" failed\n"); } if(uhash_removei(hash, (void*)five2) != 5){ log_err("FAIL: uhash_remove() failed\n"); } if(uhash_count(hash) != 4){ log_err("FAIL: uhas_count() failed. Expected: 4, Got: %d\n", uhash_count(hash)); } uhash_put(hash, (void*)one, NULL, &status); if(uhash_count(hash) != 3){ log_err("FAIL: uhash_put() with value=NULL didn't remove the key value pair\n"); } status=U_ILLEGAL_ARGUMENT_ERROR; uhash_puti(hash, (void*)one, 1, &status); if(uhash_count(hash) != 3){ log_err("FAIL: uhash_put() with value!=NULL should fail when status != U_ZERO_ERROR \n"); } status=U_ZERO_ERROR; uhash_puti(hash, (void*)one, 1, &status); if(uhash_count(hash) != 4){ log_err("FAIL: uhash_put() with value!=NULL didn't replace the key value pair\n"); } if(_compareUChars((void*)one, (void*)two) == TRUE || _compareUChars((void*)one, (void*)one) != TRUE || _compareUChars((void*)one, (void*)one2) != TRUE || _compareUChars((void*)one, NULL) == TRUE ) { log_err("FAIL: compareUChars failed\n"); } uhash_removeAll(hash); if(uhash_count(hash) != 0){ log_err("FAIL: uhas_count() failed. Expected: 0, Got: %d\n", uhash_count(hash)); } uhash_setKeyComparator(hash, uhash_compareLong); uhash_setKeyHasher(hash, uhash_hashLong); uhash_iputi(hash, 1001, 1, &status); uhash_iputi(hash, 1002, 2, &status); uhash_iputi(hash, 1003, 3, &status); if(_compareLong(1001, 1002) == TRUE || _compareLong(1001, 1001) != TRUE || _compareLong(1001, 0) == TRUE ) { log_err("FAIL: compareLong failed\n"); } /*set the resize policy to just GROW and SHRINK*/ /*how to test this??*/ uhash_setResizePolicy(hash, U_GROW_AND_SHRINK); uhash_iputi(hash, 1004, 4, &status); uhash_iputi(hash, 1005, 5, &status); uhash_iputi(hash, 1006, 6, &status); if(uhash_count(hash) != 6){ log_err("FAIL: uhash_count() failed. Expected: 6, Got: %d\n", uhash_count(hash)); } if(uhash_iremovei(hash, 1004) != 4){ log_err("FAIL: uhash_remove failed\n"); } if(uhash_iremovei(hash, 1004) != 0){ log_err("FAIL: uhash_remove failed\n"); } uhash_removeAll(hash); uhash_iput(hash, 2004, (void*)one, &status); uhash_iput(hash, 2005, (void*)two, &status); if(uhash_count(hash) != 2){ log_err("FAIL: uhash_count() failed. Expected: 2, Got: %d\n", uhash_count(hash)); } if(uhash_iremove(hash, 2004) != (void*)one){ log_err("FAIL: uhash_remove failed\n"); } if(uhash_iremove(hash, 2004) != NULL){ log_err("FAIL: uhash_remove failed\n"); } if(uhash_count(hash) != 1){ log_err("FAIL: uhash_count() failed. Expected: 1, Got: %d\n", uhash_count(hash)); } uhash_close(hash); }
void ConfusabledataBuilder::build(const char * confusables, int32_t confusablesLen, UErrorCode &status) { // Convert the user input data from UTF-8 to UChar (UTF-16) int32_t inputLen = 0; if (U_FAILURE(status)) { return; } u_strFromUTF8(NULL, 0, &inputLen, confusables, confusablesLen, &status); if (status != U_BUFFER_OVERFLOW_ERROR) { return; } status = U_ZERO_ERROR; fInput = static_cast<UChar *>(uprv_malloc((inputLen+1) * sizeof(UChar))); if (fInput == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return; } u_strFromUTF8(fInput, inputLen+1, NULL, confusables, confusablesLen, &status); // Regular Expression to parse a line from Confusables.txt. The expression will match // any line. What was matched is determined by examining which capture groups have a match. // Capture Group 1: the source char // Capture Group 2: the replacement chars // Capture Group 3-6 the table type, SL, SA, ML, or MA (deprecated) // Capture Group 7: A blank or comment only line. // Capture Group 8: A syntactically invalid line. Anything that didn't match before. // Example Line from the confusables.txt source file: // "1D702 ; 006E 0329 ; SL # MATHEMATICAL ITALIC SMALL ETA ... " UnicodeString pattern( "(?m)^[ \\t]*([0-9A-Fa-f]+)[ \\t]+;" // Match the source char "[ \\t]*([0-9A-Fa-f]+" // Match the replacement char(s) "(?:[ \\t]+[0-9A-Fa-f]+)*)[ \\t]*;" // (continued) "\\s*(?:(SL)|(SA)|(ML)|(MA))" // Match the table type "[ \\t]*(?:#.*?)?$" // Match any trailing #comment "|^([ \\t]*(?:#.*?)?)$" // OR match empty lines or lines with only a #comment "|^(.*?)$", -1, US_INV); // OR match any line, which catches illegal lines. // TODO: Why are we using the regex C API here? C++ would just take UnicodeString... fParseLine = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status); // Regular expression for parsing a hex number out of a space-separated list of them. // Capture group 1 gets the number, with spaces removed. pattern = UNICODE_STRING_SIMPLE("\\s*([0-9A-F]+)"); fParseHexNum = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status); // Zap any Byte Order Mark at the start of input. Changing it to a space is benign // given the syntax of the input. if (*fInput == 0xfeff) { *fInput = 0x20; } // Parse the input, one line per iteration of this loop. uregex_setText(fParseLine, fInput, inputLen, &status); while (uregex_findNext(fParseLine, &status)) { fLineNum++; if (uregex_start(fParseLine, 7, &status) >= 0) { // this was a blank or comment line. continue; } if (uregex_start(fParseLine, 8, &status) >= 0) { // input file syntax error. status = U_PARSE_ERROR; return; } // We have a good input line. Extract the key character and mapping string, and // put them into the appropriate mapping table. UChar32 keyChar = SpoofImpl::ScanHex(fInput, uregex_start(fParseLine, 1, &status), uregex_end(fParseLine, 1, &status), status); int32_t mapStringStart = uregex_start(fParseLine, 2, &status); int32_t mapStringLength = uregex_end(fParseLine, 2, &status) - mapStringStart; uregex_setText(fParseHexNum, &fInput[mapStringStart], mapStringLength, &status); UnicodeString *mapString = new UnicodeString(); if (mapString == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return; } while (uregex_findNext(fParseHexNum, &status)) { UChar32 c = SpoofImpl::ScanHex(&fInput[mapStringStart], uregex_start(fParseHexNum, 1, &status), uregex_end(fParseHexNum, 1, &status), status); mapString->append(c); } U_ASSERT(mapString->length() >= 1); // Put the map (value) string into the string pool // This a little like a Java intern() - any duplicates will be eliminated. SPUString *smapString = stringPool->addString(mapString, status); // Add the UChar32 -> string mapping to the table. // For Unicode 8, the SL, SA and ML tables have been discontinued. // All input data from confusables.txt is tagged MA. uhash_iput(fTable, keyChar, smapString, &status); if (U_FAILURE(status)) { return; } fKeySet->add(keyChar); } // Input data is now all parsed and collected. // Now create the run-time binary form of the data. // // This is done in two steps. First the data is assembled into vectors and strings, // for ease of construction, then the contents of these collections are dumped // into the actual raw-bytes data storage. // Build up the string array, and record the index of each string therein // in the (build time only) string pool. // Strings of length one are not entered into the strings array. // (Strings in the table are sorted by length) stringPool->sort(status); fStringTable = new UnicodeString(); int32_t poolSize = stringPool->size(); int32_t i; for (i=0; i<poolSize; i++) { SPUString *s = stringPool->getByIndex(i); int32_t strLen = s->fStr->length(); int32_t strIndex = fStringTable->length(); if (strLen == 1) { // strings of length one do not get an entry in the string table. // Keep the single string character itself here, which is the same // convention that is used in the final run-time string table index. s->fCharOrStrTableIndex = s->fStr->charAt(0); } else { s->fCharOrStrTableIndex = strIndex; fStringTable->append(*(s->fStr)); } } // Construct the compile-time Key and Value tables // // For each key code point, check which mapping tables it applies to, // and create the final data for the key & value structures. // // The four logical mapping tables are conflated into one combined table. // If multiple logical tables have the same mapping for some key, they // share a single entry in the combined table. // If more than one mapping exists for the same key code point, multiple // entries will be created in the table for (int32_t range=0; range<fKeySet->getRangeCount(); range++) { // It is an oddity of the UnicodeSet API that simply enumerating the contained // code points requires a nested loop. for (UChar32 keyChar=fKeySet->getRangeStart(range); keyChar <= fKeySet->getRangeEnd(range); keyChar++) { SPUString *targetMapping = static_cast<SPUString *>(uhash_iget(fTable, keyChar)); U_ASSERT(targetMapping != NULL); // Set an error code if trying to consume a long string. Otherwise, // codePointAndLengthToKey will abort on a U_ASSERT. if (targetMapping->fStr->length() > 256) { status = U_ILLEGAL_ARGUMENT_ERROR; return; } int32_t key = ConfusableDataUtils::codePointAndLengthToKey(keyChar, targetMapping->fStr->length()); int32_t value = targetMapping->fCharOrStrTableIndex; fKeyVec->addElement(key, status); fValueVec->addElement(value, status); } } // Put the assembled data into the flat runtime array outputData(status); // All of the intermediate allocated data belongs to the ConfusabledataBuilder // object (this), and is deleted in the destructor. return; }
/* * Initializes the region data from the ICU resource bundles. The region data * contains the basic relationships such as which regions are known, what the numeric * codes are, any known aliases, and the territory containment data. * * If the region data has already loaded, then this method simply returns without doing * anything meaningful. */ void Region::loadRegionData(UErrorCode &status) { // Construct service objs first LocalUHashtablePointer newRegionIDMap(uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status)); LocalUHashtablePointer newNumericCodeMap(uhash_open(uhash_hashLong,uhash_compareLong,NULL,&status)); LocalUHashtablePointer newRegionAliases(uhash_open(uhash_hashUnicodeString,uhash_compareUnicodeString,NULL,&status)); LocalPointer<DecimalFormat> df(new DecimalFormat(status), status); LocalPointer<UVector> continents(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status); LocalPointer<UVector> groupings(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status); allRegions = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); LocalUResourceBundlePointer metadata(ures_openDirect(NULL,"metadata",&status)); LocalUResourceBundlePointer metadataAlias(ures_getByKey(metadata.getAlias(),"alias",NULL,&status)); LocalUResourceBundlePointer territoryAlias(ures_getByKey(metadataAlias.getAlias(),"territory",NULL,&status)); LocalUResourceBundlePointer supplementalData(ures_openDirect(NULL,"supplementalData",&status)); LocalUResourceBundlePointer codeMappings(ures_getByKey(supplementalData.getAlias(),"codeMappings",NULL,&status)); LocalUResourceBundlePointer idValidity(ures_getByKey(supplementalData.getAlias(),"idValidity",NULL,&status)); LocalUResourceBundlePointer regionList(ures_getByKey(idValidity.getAlias(),"region",NULL,&status)); LocalUResourceBundlePointer regionRegular(ures_getByKey(regionList.getAlias(),"regular",NULL,&status)); LocalUResourceBundlePointer regionMacro(ures_getByKey(regionList.getAlias(),"macroregion",NULL,&status)); LocalUResourceBundlePointer regionUnknown(ures_getByKey(regionList.getAlias(),"unknown",NULL,&status)); LocalUResourceBundlePointer territoryContainment(ures_getByKey(supplementalData.getAlias(),"territoryContainment",NULL,&status)); LocalUResourceBundlePointer worldContainment(ures_getByKey(territoryContainment.getAlias(),"001",NULL,&status)); LocalUResourceBundlePointer groupingContainment(ures_getByKey(territoryContainment.getAlias(),"grouping",NULL,&status)); if (U_FAILURE(status)) { return; } // now, initialize df->setParseIntegerOnly(TRUE); uhash_setValueDeleter(newRegionIDMap.getAlias(), deleteRegion); // regionIDMap owns objs uhash_setKeyDeleter(newRegionAliases.getAlias(), uprv_deleteUObject); // regionAliases owns the string keys while ( ures_hasNext(regionRegular.getAlias()) ) { UnicodeString regionName = ures_getNextUnicodeString(regionRegular.getAlias(),NULL,&status); int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER); UChar buf[6]; regionName.extract(buf,6,status); if ( rangeMarkerLocation > 0 ) { UChar endRange = regionName.charAt(rangeMarkerLocation+1); buf[rangeMarkerLocation] = 0; while ( buf[rangeMarkerLocation-1] <= endRange ) { LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status); allRegions->addElement(newRegion.orphan(),status); buf[rangeMarkerLocation-1]++; } } else { LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status); allRegions->addElement(newRegion.orphan(),status); } } while ( ures_hasNext(regionMacro.getAlias()) ) { UnicodeString regionName = ures_getNextUnicodeString(regionMacro.getAlias(),NULL,&status); int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER); UChar buf[6]; regionName.extract(buf,6,status); if ( rangeMarkerLocation > 0 ) { UChar endRange = regionName.charAt(rangeMarkerLocation+1); buf[rangeMarkerLocation] = 0; while ( buf[rangeMarkerLocation-1] <= endRange ) { LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status); allRegions->addElement(newRegion.orphan(),status); buf[rangeMarkerLocation-1]++; } } else { LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status); allRegions->addElement(newRegion.orphan(),status); } } while ( ures_hasNext(regionUnknown.getAlias()) ) { LocalPointer<UnicodeString> regionName (new UnicodeString(ures_getNextUnicodeString(regionUnknown.getAlias(),NULL,&status),status)); allRegions->addElement(regionName.orphan(),status); } while ( ures_hasNext(worldContainment.getAlias()) ) { UnicodeString *continentName = new UnicodeString(ures_getNextUnicodeString(worldContainment.getAlias(),NULL,&status)); continents->addElement(continentName,status); } while ( ures_hasNext(groupingContainment.getAlias()) ) { UnicodeString *groupingName = new UnicodeString(ures_getNextUnicodeString(groupingContainment.getAlias(),NULL,&status)); groupings->addElement(groupingName,status); } for ( int32_t i = 0 ; i < allRegions->size() ; i++ ) { LocalPointer<Region> r(new Region(), status); if ( U_FAILURE(status) ) { return; } UnicodeString *regionName = (UnicodeString *)allRegions->elementAt(i); r->idStr = *regionName; r->idStr.extract(0,r->idStr.length(),r->id,sizeof(r->id),US_INV); r->type = URGN_TERRITORY; // Only temporary - figure out the real type later once the aliases are known. Formattable result; UErrorCode ps = U_ZERO_ERROR; df->parse(r->idStr,result,ps); if ( U_SUCCESS(ps) ) { r->code = result.getLong(); // Convert string to number uhash_iput(newNumericCodeMap.getAlias(),r->code,(void *)(r.getAlias()),&status); r->type = URGN_SUBCONTINENT; } else { r->code = -1; } void* idStrAlias = (void*)&(r->idStr); // about to orphan 'r'. Save this off. uhash_put(newRegionIDMap.getAlias(),idStrAlias,(void *)(r.orphan()),&status); // regionIDMap takes ownership } // Process the territory aliases while ( ures_hasNext(territoryAlias.getAlias()) ) { LocalUResourceBundlePointer res(ures_getNextResource(territoryAlias.getAlias(),NULL,&status)); const char *aliasFrom = ures_getKey(res.getAlias()); LocalPointer<UnicodeString> aliasFromStr(new UnicodeString(aliasFrom, -1, US_INV), status); UnicodeString aliasTo = ures_getUnicodeStringByKey(res.getAlias(),"replacement",&status); res.adoptInstead(NULL); const Region *aliasToRegion = (Region *) uhash_get(newRegionIDMap.getAlias(),&aliasTo); Region *aliasFromRegion = (Region *)uhash_get(newRegionIDMap.getAlias(),aliasFromStr.getAlias()); if ( aliasToRegion != NULL && aliasFromRegion == NULL ) { // This is just an alias from some string to a region uhash_put(newRegionAliases.getAlias(),(void *)aliasFromStr.orphan(), (void *)aliasToRegion,&status); } else { if ( aliasFromRegion == NULL ) { // Deprecated region code not in the master codes list - so need to create a deprecated region for it. LocalPointer<Region> newRgn(new Region, status); if ( U_SUCCESS(status) ) { aliasFromRegion = newRgn.orphan(); } else { return; // error out } aliasFromRegion->idStr.setTo(*aliasFromStr); aliasFromRegion->idStr.extract(0,aliasFromRegion->idStr.length(),aliasFromRegion->id,sizeof(aliasFromRegion->id),US_INV); uhash_put(newRegionIDMap.getAlias(),(void *)&(aliasFromRegion->idStr),(void *)aliasFromRegion,&status); Formattable result; UErrorCode ps = U_ZERO_ERROR; df->parse(aliasFromRegion->idStr,result,ps); if ( U_SUCCESS(ps) ) { aliasFromRegion->code = result.getLong(); // Convert string to number uhash_iput(newNumericCodeMap.getAlias(),aliasFromRegion->code,(void *)aliasFromRegion,&status); } else { aliasFromRegion->code = -1; } aliasFromRegion->type = URGN_DEPRECATED; } else { aliasFromRegion->type = URGN_DEPRECATED; } { LocalPointer<UVector> newPreferredValues(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status); aliasFromRegion->preferredValues = newPreferredValues.orphan(); } if( U_FAILURE(status)) { return; } UnicodeString currentRegion; //currentRegion.remove(); TODO: was already 0 length? for (int32_t i = 0 ; i < aliasTo.length() ; i++ ) { if ( aliasTo.charAt(i) != 0x0020 ) { currentRegion.append(aliasTo.charAt(i)); } if ( aliasTo.charAt(i) == 0x0020 || i+1 == aliasTo.length() ) { Region *target = (Region *)uhash_get(newRegionIDMap.getAlias(),(void *)¤tRegion); if (target) { LocalPointer<UnicodeString> preferredValue(new UnicodeString(target->idStr), status); aliasFromRegion->preferredValues->addElement((void *)preferredValue.orphan(),status); // may add null if err } currentRegion.remove(); } } } } // Process the code mappings - This will allow us to assign numeric codes to most of the territories. while ( ures_hasNext(codeMappings.getAlias()) ) { UResourceBundle *mapping = ures_getNextResource(codeMappings.getAlias(),NULL,&status); if ( ures_getType(mapping) == URES_ARRAY && ures_getSize(mapping) == 3) { UnicodeString codeMappingID = ures_getUnicodeStringByIndex(mapping,0,&status); UnicodeString codeMappingNumber = ures_getUnicodeStringByIndex(mapping,1,&status); UnicodeString codeMapping3Letter = ures_getUnicodeStringByIndex(mapping,2,&status); Region *r = (Region *)uhash_get(newRegionIDMap.getAlias(),(void *)&codeMappingID); if ( r ) { Formattable result; UErrorCode ps = U_ZERO_ERROR; df->parse(codeMappingNumber,result,ps); if ( U_SUCCESS(ps) ) { r->code = result.getLong(); // Convert string to number uhash_iput(newNumericCodeMap.getAlias(),r->code,(void *)r,&status); } LocalPointer<UnicodeString> code3(new UnicodeString(codeMapping3Letter), status); uhash_put(newRegionAliases.getAlias(),(void *)code3.orphan(), (void *)r,&status); } } ures_close(mapping); } // Now fill in the special cases for WORLD, UNKNOWN, CONTINENTS, and GROUPINGS Region *r; UnicodeString WORLD_ID_STRING(WORLD_ID); r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&WORLD_ID_STRING); if ( r ) { r->type = URGN_WORLD; } UnicodeString UNKNOWN_REGION_ID_STRING(UNKNOWN_REGION_ID); r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&UNKNOWN_REGION_ID_STRING); if ( r ) { r->type = URGN_UNKNOWN; } for ( int32_t i = 0 ; i < continents->size() ; i++ ) { r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)continents->elementAt(i)); if ( r ) { r->type = URGN_CONTINENT; } } for ( int32_t i = 0 ; i < groupings->size() ; i++ ) { r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)groupings->elementAt(i)); if ( r ) { r->type = URGN_GROUPING; } } // Special case: The region code "QO" (Outlying Oceania) is a subcontinent code added by CLDR // even though it looks like a territory code. Need to handle it here. UnicodeString OUTLYING_OCEANIA_REGION_ID_STRING(OUTLYING_OCEANIA_REGION_ID); r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&OUTLYING_OCEANIA_REGION_ID_STRING); if ( r ) { r->type = URGN_SUBCONTINENT; } // Load territory containment info from the supplemental data. while ( ures_hasNext(territoryContainment.getAlias()) ) { LocalUResourceBundlePointer mapping(ures_getNextResource(territoryContainment.getAlias(),NULL,&status)); if( U_FAILURE(status) ) { return; // error out } const char *parent = ures_getKey(mapping.getAlias()); if (uprv_strcmp(parent, "containedGroupings") == 0 || uprv_strcmp(parent, "deprecated") == 0) { continue; // handle new pseudo-parent types added in ICU data per cldrbug 7808; for now just skip. // #11232 is to do something useful with these. } UnicodeString parentStr = UnicodeString(parent, -1 , US_INV); Region *parentRegion = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&parentStr); for ( int j = 0 ; j < ures_getSize(mapping.getAlias()); j++ ) { UnicodeString child = ures_getUnicodeStringByIndex(mapping.getAlias(),j,&status); Region *childRegion = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&child); if ( parentRegion != NULL && childRegion != NULL ) { // Add the child region to the set of regions contained by the parent if (parentRegion->containedRegions == NULL) { parentRegion->containedRegions = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); } LocalPointer<UnicodeString> childStr(new UnicodeString(), status); if( U_FAILURE(status) ) { return; // error out } childStr->fastCopyFrom(childRegion->idStr); parentRegion->containedRegions->addElement((void *)childStr.orphan(),status); // Set the parent region to be the containing region of the child. // Regions of type GROUPING can't be set as the parent, since another region // such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent. if ( parentRegion->type != URGN_GROUPING) { childRegion->containingRegion = parentRegion; } } } } // Create the availableRegions lists int32_t pos = UHASH_FIRST; while ( const UHashElement* element = uhash_nextElement(newRegionIDMap.getAlias(),&pos)) { Region *ar = (Region *)element->value.pointer; if ( availableRegions[ar->type] == NULL ) { LocalPointer<UVector> newAr(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status); availableRegions[ar->type] = newAr.orphan(); } LocalPointer<UnicodeString> arString(new UnicodeString(ar->idStr), status); if( U_FAILURE(status) ) { return; // error out } availableRegions[ar->type]->addElement((void *)arString.orphan(),status); } ucln_i18n_registerCleanup(UCLN_I18N_REGION, region_cleanup); // copy hashtables numericCodeMap = newNumericCodeMap.orphan(); regionIDMap = newRegionIDMap.orphan(); regionAliases = newRegionAliases.orphan(); }
extern void storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length, UStringPrepType type, UErrorCode* status){ UChar* map = NULL; int16_t adjustedLen=0, i; uint16_t trieWord = 0; ValueStruct *value = NULL; uint32_t savedTrieWord = 0; /* initialize the hashtable */ if(hashTable==NULL){ hashTable = uhash_open(hashEntry, compareEntries, NULL, status); uhash_setValueDeleter(hashTable, valueDeleter); } /* figure out if the code point has type already stored */ savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL); if(savedTrieWord!=0){ if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){ /* turn on the first bit in trie word */ trieWord += 0x01; }else{ /* * the codepoint has value something other than prohibited * and a mapping .. error! */ fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint); exit(U_ILLEGAL_ARGUMENT_ERROR); } } /* figure out the real length */ for(i=0; i<length; i++){ if(mapping[i] > 0xFFFF){ adjustedLen +=2; }else{ adjustedLen++; } } if(adjustedLen == 0){ trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2); /* make sure that the value of trieWord is less than the threshold */ if(trieWord < _SPREP_TYPE_THRESHOLD){ /* now set the value in the trie */ if(!utrie_set32(sprepTrie,codepoint,trieWord)){ fprintf(stderr,"Could not set the value for code point.\n"); exit(U_ILLEGAL_ARGUMENT_ERROR); } /* value is set so just return */ return; }else{ fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD); exit(U_ILLEGAL_CHAR_FOUND); } } if(adjustedLen == 1){ /* calculate the delta */ int16_t delta = (int16_t)((int32_t)codepoint - (int16_t) mapping[0]); if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){ trieWord = delta << 2; /* make sure that the second bit is OFF */ if((trieWord & 0x02) != 0 ){ fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n"); exit(U_INTERNAL_PROGRAM_ERROR); } /* make sure that the value of trieWord is less than the threshold */ if(trieWord < _SPREP_TYPE_THRESHOLD){ /* now set the value in the trie */ if(!utrie_set32(sprepTrie,codepoint,trieWord)){ fprintf(stderr,"Could not set the value for code point.\n"); exit(U_ILLEGAL_ARGUMENT_ERROR); } /* value is set so just return */ return; } } /* * if the delta is not in the given range or if the trieWord is larger than the threshold * just fall through for storing the mapping in the mapping table */ } map = (UChar*) uprv_calloc(adjustedLen + 1, U_SIZEOF_UCHAR); i=0; while(i<length){ if(mapping[i] <= 0xFFFF){ map[i] = (uint16_t)mapping[i]; }else{ map[i] = U16_LEAD(mapping[i]); map[i+1] = U16_TRAIL(mapping[i]); } i++; } value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct)); value->mapping = map; value->type = type; value->length = adjustedLen; if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){ mappingDataCapacity++; } if(maxLength < value->length){ maxLength = value->length; } uhash_iput(hashTable,codepoint,value,status); mappingDataCapacity += adjustedLen; if(U_FAILURE(*status)){ fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status)); exit(*status); } }