コード例 #1
0
ファイル: anytrans.cpp プロジェクト: 00zhengfu00/third_party
Transliterator* AnyTransliterator::getTransliterator(UScriptCode source) const {

    if (source == targetScript || source == USCRIPT_INVALID_CODE) {
        return NULL;
    }

    Transliterator* t = (Transliterator*) uhash_iget(cache, (int32_t) source);
    if (t == NULL) {
        UErrorCode ec = U_ZERO_ERROR;
        UnicodeString sourceName(uscript_getName(source), -1, US_INV);
        UnicodeString id(sourceName);
        id.append(TARGET_SEP).append(target);
        
        t = Transliterator::createInstance(id, UTRANS_FORWARD, ec);
        if (U_FAILURE(ec) || t == NULL) {
            delete t;
            
            // Try to pivot around Latin, our most common script
            id = sourceName;
            id.append(LATIN_PIVOT, -1).append(target);
            t = Transliterator::createInstance(id, UTRANS_FORWARD, ec);
            if (U_FAILURE(ec) || t == NULL) {
                delete t;
                t = NULL;
            }
        }

        if (t != NULL) {
            uhash_iput(cache, (int32_t) source, t, &ec);
        }
    }

    return t;
}
コード例 #2
0
ファイル: anytrans.cpp プロジェクト: JoeDoyle23/node
Transliterator* AnyTransliterator::getTransliterator(UScriptCode source) const {

    if (source == targetScript || source == USCRIPT_INVALID_CODE) {
        return NULL;
    }

    Transliterator* t = NULL;
    {
        Mutex m(NULL);
        t = (Transliterator*) uhash_iget(cache, (int32_t) source);
    }
    if (t == NULL) {
        UErrorCode ec = U_ZERO_ERROR;
        UnicodeString sourceName(uscript_getShortName(source), -1, US_INV);
        UnicodeString id(sourceName);
        id.append(TARGET_SEP).append(target);

        t = Transliterator::createInstance(id, UTRANS_FORWARD, ec);
        if (U_FAILURE(ec) || t == NULL) {
            delete t;

            // Try to pivot around Latin, our most common script
            id = sourceName;
            id.append(LATIN_PIVOT, -1).append(target);
            t = Transliterator::createInstance(id, UTRANS_FORWARD, ec);
            if (U_FAILURE(ec) || t == NULL) {
                delete t;
                t = NULL;
            }
        }

        if (t != NULL) {
            Transliterator *rt = NULL;
            {
                Mutex m(NULL);
                rt = static_cast<Transliterator *> (uhash_iget(cache, (int32_t) source));
                if (rt == NULL) {
                    // Common case, no race to cache this new transliterator.
                    uhash_iput(cache, (int32_t) source, t, &ec);
                } else {
                    // Race case, some other thread beat us to caching this transliterator.
                    Transliterator *temp = rt;
                    rt = t;    // Our newly created transliterator that lost the race & now needs deleting.
                    t  = temp; // The transliterator from the cache that we will return.
                }
            }
            delete rt;    // will be non-null only in case of races.
        }
    }
    return t;
}
コード例 #3
0
ファイル: colldata.cpp プロジェクト: icu-project/icu4c
void CEToStringsMap::putStringList(uint32_t ce, StringList *stringList, UErrorCode &status)
{
    uhash_iput(map, ce, (void *) stringList, &status);
}
コード例 #4
0
ファイル: uspoof_conf.cpp プロジェクト: venkatarajasekhar/Qt
void ConfusabledataBuilder::build(const char * confusables, int32_t confusablesLen,
               UErrorCode &status) {

    // Convert the user input data from UTF-8 to UChar (UTF-16)
    int32_t inputLen = 0;
    if (U_FAILURE(status)) {
        return;
    }
    u_strFromUTF8(NULL, 0, &inputLen, confusables, confusablesLen, &status);
    if (status != U_BUFFER_OVERFLOW_ERROR) {
        return;
    }
    status = U_ZERO_ERROR;
    fInput = static_cast<UChar *>(uprv_malloc((inputLen+1) * sizeof(UChar)));
    if (fInput == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
    }
    u_strFromUTF8(fInput, inputLen+1, NULL, confusables, confusablesLen, &status);


    // Regular Expression to parse a line from Confusables.txt.  The expression will match
    // any line.  What was matched is determined by examining which capture groups have a match.
    //   Capture Group 1:  the source char
    //   Capture Group 2:  the replacement chars
    //   Capture Group 3-6  the table type, SL, SA, ML, or MA
    //   Capture Group 7:  A blank or comment only line.
    //   Capture Group 8:  A syntactically invalid line.  Anything that didn't match before.
    // Example Line from the confusables.txt source file:
    //   "1D702 ;	006E 0329 ;	SL	# MATHEMATICAL ITALIC SMALL ETA ... "
    fParseLine = uregex_openC(
        "(?m)^[ \\t]*([0-9A-Fa-f]+)[ \\t]+;"      // Match the source char
        "[ \\t]*([0-9A-Fa-f]+"                    // Match the replacement char(s)
           "(?:[ \\t]+[0-9A-Fa-f]+)*)[ \\t]*;"    //     (continued)
        "\\s*(?:(SL)|(SA)|(ML)|(MA))"             // Match the table type
        "[ \\t]*(?:#.*?)?$"                       // Match any trailing #comment
        "|^([ \\t]*(?:#.*?)?)$"       // OR match empty lines or lines with only a #comment
        "|^(.*?)$",                   // OR match any line, which catches illegal lines.
        0, NULL, &status);

    // Regular expression for parsing a hex number out of a space-separated list of them.
    //   Capture group 1 gets the number, with spaces removed.
    fParseHexNum = uregex_openC("\\s*([0-9A-F]+)", 0, NULL, &status);

    // Zap any Byte Order Mark at the start of input.  Changing it to a space is benign
    //   given the syntax of the input.
    if (*fInput == 0xfeff) {
        *fInput = 0x20;
    }

    // Parse the input, one line per iteration of this loop.
    uregex_setText(fParseLine, fInput, inputLen, &status);
    while (uregex_findNext(fParseLine, &status)) {
        fLineNum++;
        if (uregex_start(fParseLine, 7, &status) >= 0) {
            // this was a blank or comment line.
            continue;
        }
        if (uregex_start(fParseLine, 8, &status) >= 0) {
            // input file syntax error.
            status = U_PARSE_ERROR;
            return;
        }

        // We have a good input line.  Extract the key character and mapping string, and
        //    put them into the appropriate mapping table.
        UChar32 keyChar = SpoofImpl::ScanHex(fInput, uregex_start(fParseLine, 1, &status),
                          uregex_end(fParseLine, 1, &status), status);

        int32_t mapStringStart = uregex_start(fParseLine, 2, &status);
        int32_t mapStringLength = uregex_end(fParseLine, 2, &status) - mapStringStart;
        uregex_setText(fParseHexNum, &fInput[mapStringStart], mapStringLength, &status);

        UnicodeString  *mapString = new UnicodeString();
        if (mapString == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        while (uregex_findNext(fParseHexNum, &status)) {
            UChar32 c = SpoofImpl::ScanHex(&fInput[mapStringStart], uregex_start(fParseHexNum, 1, &status),
                                 uregex_end(fParseHexNum, 1, &status), status);
            mapString->append(c);
        }
        U_ASSERT(mapString->length() >= 1);

        // Put the map (value) string into the string pool
        // This a little like a Java intern() - any duplicates will be eliminated.
        SPUString *smapString = stringPool->addString(mapString, status);

        // Add the UChar32 -> string mapping to the appropriate table.
        UHashtable *table = uregex_start(fParseLine, 3, &status) >= 0 ? fSLTable :
                            uregex_start(fParseLine, 4, &status) >= 0 ? fSATable :
                            uregex_start(fParseLine, 5, &status) >= 0 ? fMLTable :
                            uregex_start(fParseLine, 6, &status) >= 0 ? fMATable :
                            NULL;
        U_ASSERT(table != NULL);
        uhash_iput(table, keyChar, smapString, &status);
        fKeySet->add(keyChar);
        if (U_FAILURE(status)) {
            return;
        }
    }

    // Input data is now all parsed and collected.
    // Now create the run-time binary form of the data.
    //
    // This is done in two steps.  First the data is assembled into vectors and strings,
    //   for ease of construction, then the contents of these collections are dumped
    //   into the actual raw-bytes data storage.

    // Build up the string array, and record the index of each string therein
    //  in the (build time only) string pool.
    // Strings of length one are not entered into the strings array.
    // At the same time, build up the string lengths table, which records the
    // position in the string table of the first string of each length >= 4.
    // (Strings in the table are sorted by length)
    stringPool->sort(status);
    fStringTable = new UnicodeString();
    fStringLengthsTable = new UVector(status);
    int32_t previousStringLength = 0;
    int32_t previousStringIndex  = 0;
    int32_t poolSize = stringPool->size();
    int32_t i;
    for (i=0; i<poolSize; i++) {
        SPUString *s = stringPool->getByIndex(i);
        int32_t strLen = s->fStr->length();
        int32_t strIndex = fStringTable->length();
        U_ASSERT(strLen >= previousStringLength);
        if (strLen == 1) {
            // strings of length one do not get an entry in the string table.
            // Keep the single string character itself here, which is the same
            //  convention that is used in the final run-time string table index.
            s->fStrTableIndex = s->fStr->charAt(0);
        } else {
            if ((strLen > previousStringLength) && (previousStringLength >= 4)) {
                fStringLengthsTable->addElement(previousStringIndex, status);
                fStringLengthsTable->addElement(previousStringLength, status);
            }
            s->fStrTableIndex = strIndex;
            fStringTable->append(*(s->fStr));
        }
        previousStringLength = strLen;
        previousStringIndex  = strIndex;
    }
    // Make the final entry to the string lengths table.
    //   (it holds an entry for the _last_ string of each length, so adding the
    //    final one doesn't happen in the main loop because no longer string was encountered.)
    if (previousStringLength >= 4) {
        fStringLengthsTable->addElement(previousStringIndex, status);
        fStringLengthsTable->addElement(previousStringLength, status);
    }

    // Construct the compile-time Key and Value tables
    //
    // For each key code point, check which mapping tables it applies to,
    //   and create the final data for the key & value structures.
    //
    //   The four logical mapping tables are conflated into one combined table.
    //   If multiple logical tables have the same mapping for some key, they
    //     share a single entry in the combined table.
    //   If more than one mapping exists for the same key code point, multiple
    //     entries will be created in the table

    for (int32_t range=0; range<fKeySet->getRangeCount(); range++) {
        // It is an oddity of the UnicodeSet API that simply enumerating the contained
        //   code points requires a nested loop.
        for (UChar32 keyChar=fKeySet->getRangeStart(range);
                keyChar <= fKeySet->getRangeEnd(range); keyChar++) {
            addKeyEntry(keyChar, fSLTable, USPOOF_SL_TABLE_FLAG, status);
            addKeyEntry(keyChar, fSATable, USPOOF_SA_TABLE_FLAG, status);
            addKeyEntry(keyChar, fMLTable, USPOOF_ML_TABLE_FLAG, status);
            addKeyEntry(keyChar, fMATable, USPOOF_MA_TABLE_FLAG, status);
        }
    }

    // Put the assembled data into the flat runtime array
    outputData(status);

    // All of the intermediate allocated data belongs to the ConfusabledataBuilder
    //  object  (this), and is deleted in the destructor.
    return;
}
コード例 #5
0
/*
 * Initializes the region data from the ICU resource bundles.  The region data
 * contains the basic relationships such as which regions are known, what the numeric
 * codes are, any known aliases, and the territory containment data.
 * 
 * If the region data has already loaded, then this method simply returns without doing
 * anything meaningful.
 */
void Region::loadRegionData() {

    if (regionDataIsLoaded) {
        return;
    }

    umtx_lock(&gRegionDataLock);

    if (regionDataIsLoaded) { // In case another thread gets to it before we do...
        umtx_unlock(&gRegionDataLock);
        return;
    }

   
    UErrorCode status = U_ZERO_ERROR;

    UResourceBundle* regionCodes = NULL;
    UResourceBundle* territoryAlias = NULL;
    UResourceBundle* codeMappings = NULL;
    UResourceBundle* worldContainment = NULL;
    UResourceBundle* territoryContainment = NULL;
    UResourceBundle* groupingContainment = NULL;

    DecimalFormat *df = new DecimalFormat(status);
    df->setParseIntegerOnly(TRUE);

    regionIDMap = uhash_open(uhash_hashUnicodeString,uhash_compareUnicodeString,NULL,&status);
    uhash_setValueDeleter(regionIDMap, deleteRegion);

    numericCodeMap = uhash_open(uhash_hashLong,uhash_compareLong,NULL,&status);

    regionAliases = uhash_open(uhash_hashUnicodeString,uhash_compareUnicodeString,NULL,&status);
    uhash_setKeyDeleter(regionAliases,uprv_deleteUObject);

    UResourceBundle *rb = ures_openDirect(NULL,"metadata",&status);
    regionCodes = ures_getByKey(rb,"regionCodes",NULL,&status);
    territoryAlias = ures_getByKey(rb,"territoryAlias",NULL,&status);
    
    UResourceBundle *rb2 = ures_openDirect(NULL,"supplementalData",&status);
    codeMappings = ures_getByKey(rb2,"codeMappings",NULL,&status);

    territoryContainment = ures_getByKey(rb2,"territoryContainment",NULL,&status);
    worldContainment = ures_getByKey(territoryContainment,"001",NULL,&status);
    groupingContainment = ures_getByKey(territoryContainment,"grouping",NULL,&status);

    UVector *continents = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);

    while ( ures_hasNext(worldContainment) ) {
        UnicodeString *continentName = new UnicodeString(ures_getNextUnicodeString(worldContainment,NULL,&status));
        continents->addElement(continentName,status);
    }

    UVector *groupings = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
    while ( ures_hasNext(groupingContainment) ) {
        UnicodeString *groupingName = new UnicodeString(ures_getNextUnicodeString(groupingContainment,NULL,&status));
        groupings->addElement(groupingName,status);
    }

    while ( ures_hasNext(regionCodes) ) {
        UnicodeString regionID = ures_getNextUnicodeString(regionCodes,NULL,&status);
        Region *r = new Region();
        r->idStr = regionID;
        r->idStr.extract(0,r->idStr.length(),r->id,sizeof(r->id),US_INV);
        r->type = URGN_TERRITORY; // Only temporary - figure out the real type later once the aliases are known.

        uhash_put(regionIDMap,(void *)&(r->idStr),(void *)r,&status);
        Formattable result;
        UErrorCode ps = U_ZERO_ERROR;
        df->parse(r->idStr,result,ps);
        if ( U_SUCCESS(ps) ) {
            r->code = result.getLong(); // Convert string to number
            uhash_iput(numericCodeMap,r->code,(void *)r,&status);
            r->type = URGN_SUBCONTINENT;
        } else {
            r->code = Region::UNDEFINED_NUMERIC_CODE;
        }
    }


    // Process the territory aliases
    while ( ures_hasNext(territoryAlias) ) {
        UResourceBundle *res = ures_getNextResource(territoryAlias,NULL,&status);
        const char *aliasFrom = ures_getKey(res);
        UnicodeString* aliasFromStr = new UnicodeString(aliasFrom);
        UnicodeString aliasTo = ures_getUnicodeString(res,&status);
        ures_close(res);

        Region *aliasToRegion = (Region *) uhash_get(regionIDMap,&aliasTo);
        Region *aliasFromRegion = (Region *)uhash_get(regionIDMap,aliasFromStr);

        if ( aliasToRegion != NULL && aliasFromRegion == NULL ) { // This is just an alias from some string to a region
            uhash_put(regionAliases,(void *)aliasFromStr, (void *)aliasToRegion,&status);
        } else {
            if ( aliasFromRegion == NULL ) { // Deprecated region code not in the master codes list - so need to create a deprecated region for it.
                aliasFromRegion = new Region();
                aliasFromRegion->idStr.setTo(*aliasFromStr);
                aliasFromRegion->idStr.extract(0,aliasFromRegion->idStr.length(),aliasFromRegion->id,sizeof(aliasFromRegion->id),US_INV);
                uhash_put(regionIDMap,(void *)&(aliasFromRegion->idStr),(void *)aliasFromRegion,&status);
                Formattable result;
                UErrorCode ps = U_ZERO_ERROR;
                df->parse(aliasFromRegion->idStr,result,ps);
                if ( U_SUCCESS(ps) ) {
                    aliasFromRegion->code = result.getLong(); // Convert string to number
                    uhash_iput(numericCodeMap,aliasFromRegion->code,(void *)aliasFromRegion,&status);
                } else {
                    aliasFromRegion->code = Region::UNDEFINED_NUMERIC_CODE;
                }
                aliasFromRegion->type = URGN_DEPRECATED;
            } else {
                aliasFromRegion->type = URGN_DEPRECATED;
            }
            delete aliasFromStr;

            aliasFromRegion->preferredValues = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
            UnicodeString currentRegion;
            currentRegion.remove();
            for (int32_t i = 0 ; i < aliasTo.length() ; i++ ) {
                if ( aliasTo.charAt(i) != 0x0020 ) {
                    currentRegion.append(aliasTo.charAt(i));
                }
                if ( aliasTo.charAt(i) == 0x0020 || i+1 == aliasTo.length() ) {
                    Region *target = (Region *)uhash_get(regionIDMap,(void *)&currentRegion);
                    if (target) {
                        UnicodeString *preferredValue = new UnicodeString(target->idStr);
                        aliasFromRegion->preferredValues->addElement((void *)preferredValue,status);
                    }
                    currentRegion.remove();
                }
            }
        }
    }

    // Process the code mappings - This will allow us to assign numeric codes to most of the territories.
    while ( ures_hasNext(codeMappings) ) {
        UResourceBundle *mapping = ures_getNextResource(codeMappings,NULL,&status);
        if ( ures_getType(mapping) == URES_ARRAY && ures_getSize(mapping) == 3) {
            UnicodeString codeMappingID = ures_getUnicodeStringByIndex(mapping,0,&status);
            UnicodeString codeMappingNumber = ures_getUnicodeStringByIndex(mapping,1,&status);
            UnicodeString codeMapping3Letter = ures_getUnicodeStringByIndex(mapping,2,&status);

            Region *r = (Region *)uhash_get(regionIDMap,(void *)&codeMappingID);
            if ( r ) {
                Formattable result;
                UErrorCode ps = U_ZERO_ERROR;
                df->parse(codeMappingNumber,result,ps);
                if ( U_SUCCESS(ps) ) {
                    r->code = result.getLong(); // Convert string to number
                    uhash_iput(numericCodeMap,r->code,(void *)r,&status);
                }
                UnicodeString *code3 = new UnicodeString(codeMapping3Letter);
                uhash_put(regionAliases,(void *)code3, (void *)r,&status);
            }                    
        }
        ures_close(mapping);
    }

    // Now fill in the special cases for WORLD, UNKNOWN, CONTINENTS, and GROUPINGS
    Region *r;
    r = (Region *) uhash_get(regionIDMap,(void *)&WORLD_ID);
    if ( r ) {
        r->type = URGN_WORLD;
    }

    r = (Region *) uhash_get(regionIDMap,(void *)&UNKNOWN_REGION_ID);
    if ( r ) {
        r->type = URGN_UNKNOWN;
    }

    for ( int32_t i = 0 ; i < continents->size() ; i++ ) {
        r = (Region *) uhash_get(regionIDMap,(void *)continents->elementAt(i));
        if ( r ) {
            r->type = URGN_CONTINENT;
        }
    }
    delete continents;

    for ( int32_t i = 0 ; i < groupings->size() ; i++ ) {
        r = (Region *) uhash_get(regionIDMap,(void *)groupings->elementAt(i));
        if ( r ) {
            r->type = URGN_GROUPING;
        }
    }
    delete groupings;

    // Special case: The region code "QO" (Outlying Oceania) is a subcontinent code added by CLDR
    // even though it looks like a territory code.  Need to handle it here.

    r = (Region *) uhash_get(regionIDMap,(void *)&OUTLYING_OCEANIA_REGION_ID);
    if ( r ) {
        r->type = URGN_SUBCONTINENT;
    }

    // Load territory containment info from the supplemental data.
    while ( ures_hasNext(territoryContainment) ) {
        UResourceBundle *mapping = ures_getNextResource(territoryContainment,NULL,&status);
        const char *parent = ures_getKey(mapping);
        UnicodeString parentStr = UnicodeString(parent);
        Region *parentRegion = (Region *) uhash_get(regionIDMap,(void *)&parentStr);

        for ( int j = 0 ; j < ures_getSize(mapping); j++ ) {
            UnicodeString child = ures_getUnicodeStringByIndex(mapping,j,&status);
            Region *childRegion = (Region *) uhash_get(regionIDMap,(void *)&child);
            if ( parentRegion != NULL && childRegion != NULL ) {                    

                // Add the child region to the set of regions contained by the parent
                if (parentRegion->containedRegions == NULL) {
                    parentRegion->containedRegions = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
                }

                UnicodeString *childStr = new UnicodeString(status);
                childStr->fastCopyFrom(childRegion->idStr);
                parentRegion->containedRegions->addElement((void *)childStr,status);

                // Set the parent region to be the containing region of the child.
                // Regions of type GROUPING can't be set as the parent, since another region
                // such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent.
                if ( parentRegion->type != URGN_GROUPING) {
                    childRegion->containingRegion = parentRegion;
                }
            }
        }
        ures_close(mapping);
    }     

    // Create the availableRegions lists
    int32_t pos = -1;
    while ( const UHashElement* element = uhash_nextElement(regionIDMap,&pos)) {
        Region *ar = (Region *)element->value.pointer;
        if ( availableRegions[ar->type] == NULL ) {
            availableRegions[ar->type] = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
        }
        UnicodeString *arString = new UnicodeString(ar->idStr);
        availableRegions[ar->type]->addElement((void *)arString,status);
    }

    ures_close(territoryContainment);
    ures_close(worldContainment);
    ures_close(groupingContainment);

    ures_close(codeMappings);
    ures_close(rb2);
    ures_close(territoryAlias);
    ures_close(regionCodes);
    ures_close(rb);

    delete df;

    ucln_i18n_registerCleanup(UCLN_I18N_REGION, region_cleanup);

    regionDataIsLoaded = true;
    umtx_unlock(&gRegionDataLock);

}
コード例 #6
0
ファイル: numfmt.cpp プロジェクト: flwh/Alcatel_OT_985_kernel
NumberFormat*
NumberFormat::makeInstance(const Locale& desiredLocale,
                           EStyles style,
                           UErrorCode& status)
{
    if (U_FAILURE(status)) return NULL;

    if (style < 0 || style >= kStyleCount) {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return NULL;
    }

#ifdef U_WINDOWS
    char buffer[8];
    int32_t count = desiredLocale.getKeywordValue("compat", buffer, sizeof(buffer), status);

    // if the locale has "@compat=host", create a host-specific NumberFormat
    if (count > 0 && uprv_strcmp(buffer, "host") == 0) {
        Win32NumberFormat *f = NULL;
        UBool curr = TRUE;

        switch (style) {
        case kNumberStyle:
            curr = FALSE;
            // fall-through

        case kCurrencyStyle:
        case kIsoCurrencyStyle: // do not support plural formatting here
        case kPluralCurrencyStyle:
            f = new Win32NumberFormat(desiredLocale, curr, status);

            if (U_SUCCESS(status)) {
                return f;
            }

            delete f;
            break;

        default:
            break;
        }
    }
#endif

    NumberFormat* f = NULL;
    DecimalFormatSymbols* symbolsToAdopt = NULL;
    UnicodeString pattern;
    UResourceBundle *resource = ures_open((char *)0, desiredLocale.getName(), &status);
    UResourceBundle *numberPatterns = ures_getByKey(resource, DecimalFormat::fgNumberPatterns, NULL, &status);
    NumberingSystem *ns = NULL;
    UBool deleteSymbols = TRUE;
    UHashtable * cache = NULL;
    int32_t hashKey;
    UBool getCache = FALSE;
    UBool deleteNS = FALSE;

    if (U_FAILURE(status)) {
        // We don't appear to have resource data available -- use the last-resort data
        status = U_USING_FALLBACK_WARNING;
        // When the data is unavailable, and locale isn't passed in, last resort data is used.
        symbolsToAdopt = new DecimalFormatSymbols(status);

        // Creates a DecimalFormat instance with the last resort number patterns.
        pattern.setTo(TRUE, gLastResortNumberPatterns[style], -1);
    }
    else {
        // If not all the styled patterns exists for the NumberFormat in this locale,
        // sets the status code to failure and returns nil.
        if (ures_getSize(numberPatterns) < (int32_t)(sizeof(gLastResortNumberPatterns)/sizeof(gLastResortNumberPatterns[0])) -2 ) { //minus 2: ISO and plural
            status = U_INVALID_FORMAT_ERROR;
            goto cleanup;
        }

        // Loads the decimal symbols of the desired locale.
        symbolsToAdopt = new DecimalFormatSymbols(desiredLocale, status);

        int32_t patLen = 0;

        /* for ISOCURRENCYSTYLE and PLURALCURRENCYSTYLE,
         * the pattern is the same as the pattern of CURRENCYSTYLE
         * but by replacing the single currency sign with
         * double currency sign or triple currency sign.
         */
        int styleInNumberPattern = ((style == kIsoCurrencyStyle ||
                                     style == kPluralCurrencyStyle) ?
                                    kCurrencyStyle : style);

        const UChar *patResStr = ures_getStringByIndex(numberPatterns, (int32_t)styleInNumberPattern, &patLen, &status);

        // Creates the specified decimal format style of the desired locale.
        pattern.setTo(TRUE, patResStr, patLen);
    }
    if (U_FAILURE(status) || symbolsToAdopt == NULL) {
        goto cleanup;
    }
    if(style==kCurrencyStyle || style == kIsoCurrencyStyle){
        const UChar* currPattern = symbolsToAdopt->getCurrencyPattern();
        if(currPattern!=NULL){
            pattern.setTo(currPattern, u_strlen(currPattern));
        }
    }

    // Use numbering system cache hashtable
    UMTX_CHECK(&nscacheMutex, (UBool)(cache != NumberingSystem_cache), getCache);
    if (getCache) {
        umtx_lock(&nscacheMutex);
        cache = NumberingSystem_cache;
        umtx_unlock(&nscacheMutex);
    }

    // Check cache we got, create if non-existant
    status = U_ZERO_ERROR;
    if (cache == NULL) {
        cache = uhash_open(uhash_hashLong,
                           uhash_compareLong,
                           NULL,
                           &status);

        if (cache == NULL || U_FAILURE(status)) {
            // cache not created - out of memory
            cache = NULL;
        }
        else {
            // cache created
            uhash_setValueDeleter(cache, deleteNumberingSystem);

            // set final NumberingSystem_cache value
            UHashtable* h = NULL;

            UMTX_CHECK(&nscacheMutex, (UBool)(h != NumberingSystem_cache), getCache);
            if (getCache) {
                umtx_lock(&nscacheMutex);
                h = NumberingSystem_cache;
                umtx_unlock(&nscacheMutex);
            }
            if (h == NULL) {
                umtx_lock(&nscacheMutex);
                NumberingSystem_cache = h = cache;
                cache = NULL;
                ucln_i18n_registerCleanup(UCLN_I18N_NUMFMT, numfmt_cleanup);
                umtx_unlock(&nscacheMutex);
            }

            if(cache != NULL) {
              uhash_close(cache);
            }
            cache = h;
        }
    }

    // Get cached numbering system
    if (cache != NULL) {
        hashKey = desiredLocale.hashCode();

        umtx_lock(&nscacheMutex);
        ns = (NumberingSystem *)uhash_iget(cache, hashKey);
        if (ns == NULL) {
            ns = NumberingSystem::createInstance(desiredLocale,status);
            uhash_iput(cache, hashKey, (void*)ns, &status);
        }
        umtx_unlock(&nscacheMutex);
    }
    else {
        ns = NumberingSystem::createInstance(desiredLocale,status);
        deleteNS = TRUE;
    }

    // check results of getting a numbering system
    if ((ns == NULL) || (U_FAILURE(status))) {
        goto cleanup;
    }

    if (ns->isAlgorithmic()) {
        UnicodeString nsDesc;
        UnicodeString nsRuleSetGroup;
        UnicodeString nsRuleSetName;
        Locale nsLoc;
        URBNFRuleSetTag desiredRulesType = URBNF_NUMBERING_SYSTEM;

        nsDesc.setTo(ns->getDescription());
        int32_t firstSlash = nsDesc.indexOf(gSlash);
        int32_t lastSlash = nsDesc.lastIndexOf(gSlash);
        if ( lastSlash > firstSlash ) {
            char nsLocID[ULOC_FULLNAME_CAPACITY];

            nsDesc.extract(0,firstSlash,nsLocID,ULOC_FULLNAME_CAPACITY,US_INV);
            nsRuleSetGroup.setTo(nsDesc,firstSlash+1,lastSlash-firstSlash-1);
            nsRuleSetName.setTo(nsDesc,lastSlash+1);

            nsLoc = Locale::createFromName(nsLocID);

            UnicodeString SpelloutRules = UNICODE_STRING_SIMPLE("SpelloutRules");
            if ( nsRuleSetGroup.compare(SpelloutRules) == 0 ) {
                desiredRulesType = URBNF_SPELLOUT;
            }
        } else {
            nsLoc = desiredLocale;
            nsRuleSetName.setTo(nsDesc);
        }

        RuleBasedNumberFormat *r = new RuleBasedNumberFormat(desiredRulesType,nsLoc,status);

        if (U_FAILURE(status) || r == NULL) {
            goto cleanup;
        }
        r->setDefaultRuleSet(nsRuleSetName,status);
        f = (NumberFormat *) r;

    } else {
        // replace single currency sign in the pattern with double currency sign
        // if the style is kIsoCurrencyStyle
        if (style == kIsoCurrencyStyle) {
            pattern.findAndReplace(gSingleCurrencySign, gDoubleCurrencySign);
        }

        f = new DecimalFormat(pattern, symbolsToAdopt, style, status);
        if (U_FAILURE(status) || f == NULL) {
            goto cleanup;
        }
        deleteSymbols = FALSE;
    }

    f->setLocaleIDs(ures_getLocaleByType(numberPatterns, ULOC_VALID_LOCALE, &status),
                    ures_getLocaleByType(numberPatterns, ULOC_ACTUAL_LOCALE, &status));

cleanup:
    ures_close(numberPatterns);
    ures_close(resource);

    if (deleteNS && ns) {
        delete ns;
    }

    if (U_FAILURE(status)) {
        /* If f exists, then it will delete the symbols */
        if (f==NULL) {
            delete symbolsToAdopt;
        }
        else {
            delete f;
        }
        return NULL;
    }
    if (f == NULL || symbolsToAdopt == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        f = NULL;
    }
    if (deleteSymbols && symbolsToAdopt != NULL) {
        delete symbolsToAdopt;
    }
    return f;
}
コード例 #7
0
NumberFormat*
NumberFormat::makeInstance(const Locale& desiredLocale,
                           UNumberFormatStyle style,
                           UBool mustBeDecimalFormat,
                           UErrorCode& status) {
    if (U_FAILURE(status)) return NULL;

    if (style < 0 || style >= UNUM_FORMAT_STYLE_COUNT) {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return NULL;
    }

    // Some styles are not supported. This is a result of merging
    // the @draft ICU 4.2 NumberFormat::EStyles into the long-existing UNumberFormatStyle.
    // Ticket #8503 is for reviewing/fixing/merging the two relevant implementations:
    // this one and unum_open().
    // The UNUM_PATTERN_ styles are not supported here
    // because this method does not take a pattern string.
    if (!isStyleSupported(style)) {
        status = U_UNSUPPORTED_ERROR;
        return NULL;
    }

#if U_PLATFORM_USES_ONLY_WIN32_API
    if (!mustBeDecimalFormat) {
        char buffer[8];
        int32_t count = desiredLocale.getKeywordValue("compat", buffer, sizeof(buffer), status);

        // if the locale has "@compat=host", create a host-specific NumberFormat
        if (U_SUCCESS(status) && count > 0 && uprv_strcmp(buffer, "host") == 0) {
            Win32NumberFormat *f = NULL;
            UBool curr = TRUE;

            switch (style) {
            case UNUM_DECIMAL:
                curr = FALSE;
                // fall-through

            case UNUM_CURRENCY:
            case UNUM_CURRENCY_ISO: // do not support plural formatting here
            case UNUM_CURRENCY_PLURAL:
                f = new Win32NumberFormat(desiredLocale, curr, status);

                if (U_SUCCESS(status)) {
                    return f;
                }

                delete f;
                break;
            default:
                break;
            }
        }
    }
#endif
    // Use numbering system cache hashtable
    umtx_initOnce(gNSCacheInitOnce, &nscacheInit);

    // Get cached numbering system
    LocalPointer<NumberingSystem> ownedNs;
    NumberingSystem *ns = NULL;
    if (NumberingSystem_cache != NULL) {
        // TODO: Bad hash key usage, see ticket #8504.
        int32_t hashKey = desiredLocale.hashCode();

        Mutex lock(&nscacheMutex);
        ns = (NumberingSystem *)uhash_iget(NumberingSystem_cache, hashKey);
        if (ns == NULL) {
            ns = NumberingSystem::createInstance(desiredLocale,status);
            uhash_iput(NumberingSystem_cache, hashKey, (void*)ns, &status);
        }
    } else {
        ownedNs.adoptInstead(NumberingSystem::createInstance(desiredLocale,status));
        ns = ownedNs.getAlias();
    }

    // check results of getting a numbering system
    if (U_FAILURE(status)) {
        return NULL;
    }

    if (mustBeDecimalFormat && ns->isAlgorithmic()) {
        status = U_UNSUPPORTED_ERROR;
        return NULL;
    }

    LocalPointer<DecimalFormatSymbols> symbolsToAdopt;
    UnicodeString pattern;
    LocalUResourceBundlePointer ownedResource(ures_open(NULL, desiredLocale.getName(), &status));
    if (U_FAILURE(status)) {
        // We don't appear to have resource data available -- use the last-resort data
        status = U_USING_FALLBACK_WARNING;
        // When the data is unavailable, and locale isn't passed in, last resort data is used.
        symbolsToAdopt.adoptInstead(new DecimalFormatSymbols(status));
        if (symbolsToAdopt.isNull()) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return NULL;
        }

        // Creates a DecimalFormat instance with the last resort number patterns.
        pattern.setTo(TRUE, gLastResortNumberPatterns[style], -1);
    }
    else {
        // Loads the decimal symbols of the desired locale.
        symbolsToAdopt.adoptInstead(new DecimalFormatSymbols(desiredLocale, status));
        if (symbolsToAdopt.isNull()) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return NULL;
        }

        UResourceBundle *resource = ownedResource.orphan();
        UResourceBundle *numElements = ures_getByKeyWithFallback(resource, gNumberElements, NULL, &status);
        resource = ures_getByKeyWithFallback(numElements, ns->getName(), resource, &status);
        resource = ures_getByKeyWithFallback(resource, gPatterns, resource, &status);
        ownedResource.adoptInstead(resource);

        int32_t patLen = 0;
        const UChar *patResStr = ures_getStringByKeyWithFallback(resource, gFormatKeys[style], &patLen, &status);

        // Didn't find a pattern specific to the numbering system, so fall back to "latn"
        if ( status == U_MISSING_RESOURCE_ERROR && uprv_strcmp(gLatn,ns->getName())) {
            status = U_ZERO_ERROR;
            resource = ures_getByKeyWithFallback(numElements, gLatn, resource, &status);
            resource = ures_getByKeyWithFallback(resource, gPatterns, resource, &status);
            patResStr = ures_getStringByKeyWithFallback(resource, gFormatKeys[style], &patLen, &status);
        }

        ures_close(numElements);

        // Creates the specified decimal format style of the desired locale.
        pattern.setTo(TRUE, patResStr, patLen);
    }
    if (U_FAILURE(status)) {
        return NULL;
    }
    if(style==UNUM_CURRENCY || style == UNUM_CURRENCY_ISO){
        const UChar* currPattern = symbolsToAdopt->getCurrencyPattern();
        if(currPattern!=NULL){
            pattern.setTo(currPattern, u_strlen(currPattern));
        }
    }


    NumberFormat *f;
    if (ns->isAlgorithmic()) {
        UnicodeString nsDesc;
        UnicodeString nsRuleSetGroup;
        UnicodeString nsRuleSetName;
        Locale nsLoc;
        URBNFRuleSetTag desiredRulesType = URBNF_NUMBERING_SYSTEM;

        nsDesc.setTo(ns->getDescription());
        int32_t firstSlash = nsDesc.indexOf(gSlash);
        int32_t lastSlash = nsDesc.lastIndexOf(gSlash);
        if ( lastSlash > firstSlash ) {
            CharString nsLocID;

            nsLocID.appendInvariantChars(nsDesc.tempSubString(0, firstSlash), status);
            nsRuleSetGroup.setTo(nsDesc,firstSlash+1,lastSlash-firstSlash-1);
            nsRuleSetName.setTo(nsDesc,lastSlash+1);

            nsLoc = Locale::createFromName(nsLocID.data());

            UnicodeString SpelloutRules = UNICODE_STRING_SIMPLE("SpelloutRules");
            if ( nsRuleSetGroup.compare(SpelloutRules) == 0 ) {
                desiredRulesType = URBNF_SPELLOUT;
            }
        } else {
            nsLoc = desiredLocale;
            nsRuleSetName.setTo(nsDesc);
        }

        RuleBasedNumberFormat *r = new RuleBasedNumberFormat(desiredRulesType,nsLoc,status);
        if (r == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return NULL;
        }
        r->setDefaultRuleSet(nsRuleSetName,status);
        f = r;
    } else {
        // replace single currency sign in the pattern with double currency sign
        // if the style is UNUM_CURRENCY_ISO
        if (style == UNUM_CURRENCY_ISO) {
            pattern.findAndReplace(UnicodeString(TRUE, gSingleCurrencySign, 1),
                                   UnicodeString(TRUE, gDoubleCurrencySign, 2));
        }

        // "new DecimalFormat()" does not adopt the symbols if its memory allocation fails.
        DecimalFormatSymbols *syms = symbolsToAdopt.orphan();
        f = new DecimalFormat(pattern, syms, style, status);
        if (f == NULL) {
            delete syms;
            status = U_MEMORY_ALLOCATION_ERROR;
            return NULL;
        }
    }

    f->setLocaleIDs(ures_getLocaleByType(ownedResource.getAlias(), ULOC_VALID_LOCALE, &status),
                    ures_getLocaleByType(ownedResource.getAlias(), ULOC_ACTUAL_LOCALE, &status));
    if (U_FAILURE(status)) {
        delete f;
        return NULL;
    }
    return f;
}
コード例 #8
0
ファイル: chashtst.c プロジェクト: flwh/Alcatel_OT_985_kernel
static void TestOtherAPI(void){
    
    UErrorCode status = U_ZERO_ERROR;
    UHashtable *hash;

    /* Use the correct type when cast to void * */
    static const UChar one[4]   = {0x006F, 0x006E, 0x0065, 0}; /* L"one" */
    static const UChar one2[4]  = {0x006F, 0x006E, 0x0065, 0}; /* Get around compiler optimizations */
    static const UChar two[4]   = {0x0074, 0x0077, 0x006F, 0}; /* L"two" */
    static const UChar two2[4]  = {0x0074, 0x0077, 0x006F, 0}; /* L"two" */
    static const UChar three[6] = {0x0074, 0x0068, 0x0072, 0x0065, 0x0065, 0}; /* L"three" */
    static const UChar four[6]  = {0x0066, 0x006F, 0x0075, 0x0072, 0}; /* L"four" */
    static const UChar five[6]  = {0x0066, 0x0069, 0x0076, 0x0065, 0}; /* L"five" */
    static const UChar five2[6] = {0x0066, 0x0069, 0x0076, 0x0065, 0}; /* L"five" */

    hash = uhash_open(uhash_hashUChars, uhash_compareUChars, NULL,  &status);
    if (U_FAILURE(status)) {
        log_err("FAIL: uhash_open failed with %s and returned 0x%08x\n",
                u_errorName(status), hash);
        return;
    }
    if (hash == NULL) {
        log_err("FAIL: uhash_open returned NULL\n");
        return;
    }
    log_verbose("Ok: uhash_open returned 0x%08X\n", hash);

    uhash_puti(hash, (void*)one, 1, &status);
    if(uhash_count(hash) != 1){
         log_err("FAIL: uhas_count() failed. Expected: 1, Got: %d\n", uhash_count(hash));
    }
    if(uhash_find(hash, (void*)two) != NULL){
        log_err("FAIL: uhash_find failed\n");
    }
    uhash_puti(hash, (void*)two, 2, &status);
    uhash_puti(hash, (void*)three, 3, &status);
    uhash_puti(hash, (void*)four, 4, &status);
    uhash_puti(hash, (void*)five, 5, &status);
    
    if(uhash_count(hash) != 5){
        log_err("FAIL: uhas_count() failed. Expected: 5, Got: %d\n", uhash_count(hash));
    }
    
    if(uhash_geti(hash, (void*)two2) != 2){
        log_err("FAIL: uhash_geti failed\n");
    }
    
    if(uhash_find(hash, (void*)two2) == NULL){
        log_err("FAIL: uhash_find of \"two\" failed\n");
    }
    
    if(uhash_removei(hash, (void*)five2) != 5){
        log_err("FAIL: uhash_remove() failed\n");
    }
    if(uhash_count(hash) != 4){
        log_err("FAIL: uhas_count() failed. Expected: 4, Got: %d\n", uhash_count(hash));
    }

    uhash_put(hash, (void*)one, NULL, &status);
    if(uhash_count(hash) != 3){
        log_err("FAIL: uhash_put() with value=NULL didn't remove the key value pair\n");
    }
    status=U_ILLEGAL_ARGUMENT_ERROR;
    uhash_puti(hash, (void*)one, 1, &status);
    if(uhash_count(hash) != 3){
        log_err("FAIL: uhash_put() with value!=NULL should fail when status != U_ZERO_ERROR \n");
    }
    
    status=U_ZERO_ERROR;
    uhash_puti(hash, (void*)one, 1, &status);
    if(uhash_count(hash) != 4){
        log_err("FAIL: uhash_put() with value!=NULL didn't replace the key value pair\n");
    }

    if(_compareUChars((void*)one, (void*)two) == TRUE ||
        _compareUChars((void*)one, (void*)one) != TRUE ||
        _compareUChars((void*)one, (void*)one2) != TRUE ||
        _compareUChars((void*)one, NULL) == TRUE  )  {
        log_err("FAIL: compareUChars failed\n");
    }
   
    uhash_removeAll(hash);
    if(uhash_count(hash) != 0){
        log_err("FAIL: uhas_count() failed. Expected: 0, Got: %d\n", uhash_count(hash));
    }

    uhash_setKeyComparator(hash, uhash_compareLong);
    uhash_setKeyHasher(hash, uhash_hashLong);
    uhash_iputi(hash, 1001, 1, &status);
    uhash_iputi(hash, 1002, 2, &status);
    uhash_iputi(hash, 1003, 3, &status);
    if(_compareLong(1001, 1002) == TRUE ||
        _compareLong(1001, 1001) != TRUE ||
        _compareLong(1001, 0) == TRUE  )  {
        log_err("FAIL: compareLong failed\n");
    }
    /*set the resize policy to just GROW and SHRINK*/
         /*how to test this??*/
    uhash_setResizePolicy(hash, U_GROW_AND_SHRINK);
    uhash_iputi(hash, 1004, 4, &status);
    uhash_iputi(hash, 1005, 5, &status);
    uhash_iputi(hash, 1006, 6, &status);
    if(uhash_count(hash) != 6){
        log_err("FAIL: uhash_count() failed. Expected: 6, Got: %d\n", uhash_count(hash));
    }
    if(uhash_iremovei(hash, 1004) != 4){
        log_err("FAIL: uhash_remove failed\n");
    }
    if(uhash_iremovei(hash, 1004) != 0){
        log_err("FAIL: uhash_remove failed\n");
    }

    uhash_removeAll(hash);
    uhash_iput(hash, 2004, (void*)one, &status);
    uhash_iput(hash, 2005, (void*)two, &status);
    if(uhash_count(hash) != 2){
        log_err("FAIL: uhash_count() failed. Expected: 2, Got: %d\n", uhash_count(hash));
    }
    if(uhash_iremove(hash, 2004) != (void*)one){
        log_err("FAIL: uhash_remove failed\n");
    }
    if(uhash_iremove(hash, 2004) != NULL){
        log_err("FAIL: uhash_remove failed\n");
    }
    if(uhash_count(hash) != 1){
        log_err("FAIL: uhash_count() failed. Expected: 1, Got: %d\n", uhash_count(hash));
    }

    uhash_close(hash);

}
コード例 #9
0
ファイル: uspoof_conf.cpp プロジェクト: DavidCai1993/node
void ConfusabledataBuilder::build(const char * confusables, int32_t confusablesLen,
               UErrorCode &status) {

    // Convert the user input data from UTF-8 to UChar (UTF-16)
    int32_t inputLen = 0;
    if (U_FAILURE(status)) {
        return;
    }
    u_strFromUTF8(NULL, 0, &inputLen, confusables, confusablesLen, &status);
    if (status != U_BUFFER_OVERFLOW_ERROR) {
        return;
    }
    status = U_ZERO_ERROR;
    fInput = static_cast<UChar *>(uprv_malloc((inputLen+1) * sizeof(UChar)));
    if (fInput == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    u_strFromUTF8(fInput, inputLen+1, NULL, confusables, confusablesLen, &status);


    // Regular Expression to parse a line from Confusables.txt.  The expression will match
    // any line.  What was matched is determined by examining which capture groups have a match.
    //   Capture Group 1:  the source char
    //   Capture Group 2:  the replacement chars
    //   Capture Group 3-6  the table type, SL, SA, ML, or MA (deprecated)
    //   Capture Group 7:  A blank or comment only line.
    //   Capture Group 8:  A syntactically invalid line.  Anything that didn't match before.
    // Example Line from the confusables.txt source file:
    //   "1D702 ;	006E 0329 ;	SL	# MATHEMATICAL ITALIC SMALL ETA ... "
    UnicodeString pattern(
        "(?m)^[ \\t]*([0-9A-Fa-f]+)[ \\t]+;"      // Match the source char
        "[ \\t]*([0-9A-Fa-f]+"                    // Match the replacement char(s)
           "(?:[ \\t]+[0-9A-Fa-f]+)*)[ \\t]*;"    //     (continued)
        "\\s*(?:(SL)|(SA)|(ML)|(MA))"             // Match the table type
        "[ \\t]*(?:#.*?)?$"                       // Match any trailing #comment
        "|^([ \\t]*(?:#.*?)?)$"       // OR match empty lines or lines with only a #comment
        "|^(.*?)$", -1, US_INV);      // OR match any line, which catches illegal lines.
    // TODO: Why are we using the regex C API here? C++ would just take UnicodeString...
    fParseLine = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status);

    // Regular expression for parsing a hex number out of a space-separated list of them.
    //   Capture group 1 gets the number, with spaces removed.
    pattern = UNICODE_STRING_SIMPLE("\\s*([0-9A-F]+)");
    fParseHexNum = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status);

    // Zap any Byte Order Mark at the start of input.  Changing it to a space is benign
    //   given the syntax of the input.
    if (*fInput == 0xfeff) {
        *fInput = 0x20;
    }

    // Parse the input, one line per iteration of this loop.
    uregex_setText(fParseLine, fInput, inputLen, &status);
    while (uregex_findNext(fParseLine, &status)) {
        fLineNum++;
        if (uregex_start(fParseLine, 7, &status) >= 0) {
            // this was a blank or comment line.
            continue;
        }
        if (uregex_start(fParseLine, 8, &status) >= 0) {
            // input file syntax error.
            status = U_PARSE_ERROR;
            return;
        }

        // We have a good input line.  Extract the key character and mapping string, and
        //    put them into the appropriate mapping table.
        UChar32 keyChar = SpoofImpl::ScanHex(fInput, uregex_start(fParseLine, 1, &status),
                          uregex_end(fParseLine, 1, &status), status);

        int32_t mapStringStart = uregex_start(fParseLine, 2, &status);
        int32_t mapStringLength = uregex_end(fParseLine, 2, &status) - mapStringStart;
        uregex_setText(fParseHexNum, &fInput[mapStringStart], mapStringLength, &status);

        UnicodeString  *mapString = new UnicodeString();
        if (mapString == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        while (uregex_findNext(fParseHexNum, &status)) {
            UChar32 c = SpoofImpl::ScanHex(&fInput[mapStringStart], uregex_start(fParseHexNum, 1, &status),
                                 uregex_end(fParseHexNum, 1, &status), status);
            mapString->append(c);
        }
        U_ASSERT(mapString->length() >= 1);

        // Put the map (value) string into the string pool
        // This a little like a Java intern() - any duplicates will be eliminated.
        SPUString *smapString = stringPool->addString(mapString, status);

        // Add the UChar32 -> string mapping to the table.
        // For Unicode 8, the SL, SA and ML tables have been discontinued.
        //                All input data from confusables.txt is tagged MA.
        uhash_iput(fTable, keyChar, smapString, &status);
        if (U_FAILURE(status)) { return; }
        fKeySet->add(keyChar);
    }

    // Input data is now all parsed and collected.
    // Now create the run-time binary form of the data.
    //
    // This is done in two steps.  First the data is assembled into vectors and strings,
    //   for ease of construction, then the contents of these collections are dumped
    //   into the actual raw-bytes data storage.

    // Build up the string array, and record the index of each string therein
    //  in the (build time only) string pool.
    // Strings of length one are not entered into the strings array.
    // (Strings in the table are sorted by length)
    stringPool->sort(status);
    fStringTable = new UnicodeString();
    int32_t poolSize = stringPool->size();
    int32_t i;
    for (i=0; i<poolSize; i++) {
        SPUString *s = stringPool->getByIndex(i);
        int32_t strLen = s->fStr->length();
        int32_t strIndex = fStringTable->length();
        if (strLen == 1) {
            // strings of length one do not get an entry in the string table.
            // Keep the single string character itself here, which is the same
            //  convention that is used in the final run-time string table index.
            s->fCharOrStrTableIndex = s->fStr->charAt(0);
        } else {
            s->fCharOrStrTableIndex = strIndex;
            fStringTable->append(*(s->fStr));
        }
    }

    // Construct the compile-time Key and Value tables
    //
    // For each key code point, check which mapping tables it applies to,
    //   and create the final data for the key & value structures.
    //
    //   The four logical mapping tables are conflated into one combined table.
    //   If multiple logical tables have the same mapping for some key, they
    //     share a single entry in the combined table.
    //   If more than one mapping exists for the same key code point, multiple
    //     entries will be created in the table

    for (int32_t range=0; range<fKeySet->getRangeCount(); range++) {
        // It is an oddity of the UnicodeSet API that simply enumerating the contained
        //   code points requires a nested loop.
        for (UChar32 keyChar=fKeySet->getRangeStart(range);
                keyChar <= fKeySet->getRangeEnd(range); keyChar++) {
            SPUString *targetMapping = static_cast<SPUString *>(uhash_iget(fTable, keyChar));
            U_ASSERT(targetMapping != NULL);

            // Set an error code if trying to consume a long string.  Otherwise,
            // codePointAndLengthToKey will abort on a U_ASSERT.
            if (targetMapping->fStr->length() > 256) {
                status = U_ILLEGAL_ARGUMENT_ERROR;
                return;
            }

            int32_t key = ConfusableDataUtils::codePointAndLengthToKey(keyChar,
                targetMapping->fStr->length());
            int32_t value = targetMapping->fCharOrStrTableIndex;

            fKeyVec->addElement(key, status);
            fValueVec->addElement(value, status);
        }
    }

    // Put the assembled data into the flat runtime array
    outputData(status);

    // All of the intermediate allocated data belongs to the ConfusabledataBuilder
    //  object  (this), and is deleted in the destructor.
    return;
}
コード例 #10
0
ファイル: region.cpp プロジェクト: Cyril2004/proto-quic
/*
 * Initializes the region data from the ICU resource bundles.  The region data
 * contains the basic relationships such as which regions are known, what the numeric
 * codes are, any known aliases, and the territory containment data.
 *
 * If the region data has already loaded, then this method simply returns without doing
 * anything meaningful.
 */
void Region::loadRegionData(UErrorCode &status) {

    // Construct service objs first
    LocalUHashtablePointer newRegionIDMap(uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status));
    LocalUHashtablePointer newNumericCodeMap(uhash_open(uhash_hashLong,uhash_compareLong,NULL,&status));
    LocalUHashtablePointer newRegionAliases(uhash_open(uhash_hashUnicodeString,uhash_compareUnicodeString,NULL,&status));
    LocalPointer<DecimalFormat> df(new DecimalFormat(status), status);

    LocalPointer<UVector> continents(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
    LocalPointer<UVector> groupings(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
    allRegions = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);

    LocalUResourceBundlePointer metadata(ures_openDirect(NULL,"metadata",&status));
    LocalUResourceBundlePointer metadataAlias(ures_getByKey(metadata.getAlias(),"alias",NULL,&status));
    LocalUResourceBundlePointer territoryAlias(ures_getByKey(metadataAlias.getAlias(),"territory",NULL,&status));

    LocalUResourceBundlePointer supplementalData(ures_openDirect(NULL,"supplementalData",&status));
    LocalUResourceBundlePointer codeMappings(ures_getByKey(supplementalData.getAlias(),"codeMappings",NULL,&status));

    LocalUResourceBundlePointer idValidity(ures_getByKey(supplementalData.getAlias(),"idValidity",NULL,&status));
    LocalUResourceBundlePointer regionList(ures_getByKey(idValidity.getAlias(),"region",NULL,&status));
    LocalUResourceBundlePointer regionRegular(ures_getByKey(regionList.getAlias(),"regular",NULL,&status));
    LocalUResourceBundlePointer regionMacro(ures_getByKey(regionList.getAlias(),"macroregion",NULL,&status));
    LocalUResourceBundlePointer regionUnknown(ures_getByKey(regionList.getAlias(),"unknown",NULL,&status));

    LocalUResourceBundlePointer territoryContainment(ures_getByKey(supplementalData.getAlias(),"territoryContainment",NULL,&status));
    LocalUResourceBundlePointer worldContainment(ures_getByKey(territoryContainment.getAlias(),"001",NULL,&status));
    LocalUResourceBundlePointer groupingContainment(ures_getByKey(territoryContainment.getAlias(),"grouping",NULL,&status));

    if (U_FAILURE(status)) {
        return;
    }

    // now, initialize
    df->setParseIntegerOnly(TRUE);
    uhash_setValueDeleter(newRegionIDMap.getAlias(), deleteRegion);  // regionIDMap owns objs
    uhash_setKeyDeleter(newRegionAliases.getAlias(), uprv_deleteUObject); // regionAliases owns the string keys


    while ( ures_hasNext(regionRegular.getAlias()) ) {
        UnicodeString regionName = ures_getNextUnicodeString(regionRegular.getAlias(),NULL,&status);
        int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER);
        UChar buf[6];
        regionName.extract(buf,6,status);
        if ( rangeMarkerLocation > 0 ) {
            UChar endRange = regionName.charAt(rangeMarkerLocation+1);
            buf[rangeMarkerLocation] = 0;
            while ( buf[rangeMarkerLocation-1] <= endRange ) {
                LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status);
                allRegions->addElement(newRegion.orphan(),status);
                buf[rangeMarkerLocation-1]++;
            }
        } else {
            LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status);
            allRegions->addElement(newRegion.orphan(),status);
        }
    }

    while ( ures_hasNext(regionMacro.getAlias()) ) {
        UnicodeString regionName = ures_getNextUnicodeString(regionMacro.getAlias(),NULL,&status);
        int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER);
        UChar buf[6];
        regionName.extract(buf,6,status);
        if ( rangeMarkerLocation > 0 ) {
            UChar endRange = regionName.charAt(rangeMarkerLocation+1);
            buf[rangeMarkerLocation] = 0;
            while ( buf[rangeMarkerLocation-1] <= endRange ) {
                LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status);
                allRegions->addElement(newRegion.orphan(),status);
                buf[rangeMarkerLocation-1]++;
            }
        } else {
            LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status);
            allRegions->addElement(newRegion.orphan(),status);
        }
    }

    while ( ures_hasNext(regionUnknown.getAlias()) ) {
        LocalPointer<UnicodeString> regionName (new UnicodeString(ures_getNextUnicodeString(regionUnknown.getAlias(),NULL,&status),status));
        allRegions->addElement(regionName.orphan(),status);
    }

    while ( ures_hasNext(worldContainment.getAlias()) ) {
        UnicodeString *continentName = new UnicodeString(ures_getNextUnicodeString(worldContainment.getAlias(),NULL,&status));
        continents->addElement(continentName,status);
    }

    while ( ures_hasNext(groupingContainment.getAlias()) ) {
        UnicodeString *groupingName = new UnicodeString(ures_getNextUnicodeString(groupingContainment.getAlias(),NULL,&status));
        groupings->addElement(groupingName,status);
    }

    for ( int32_t i = 0 ; i < allRegions->size() ; i++ ) {
        LocalPointer<Region> r(new Region(), status);
        if ( U_FAILURE(status) ) {
           return;
        }
        UnicodeString *regionName = (UnicodeString *)allRegions->elementAt(i);
        r->idStr = *regionName;

        r->idStr.extract(0,r->idStr.length(),r->id,sizeof(r->id),US_INV);
        r->type = URGN_TERRITORY; // Only temporary - figure out the real type later once the aliases are known.

        Formattable result;
        UErrorCode ps = U_ZERO_ERROR;
        df->parse(r->idStr,result,ps);
        if ( U_SUCCESS(ps) ) {
            r->code = result.getLong(); // Convert string to number
            uhash_iput(newNumericCodeMap.getAlias(),r->code,(void *)(r.getAlias()),&status);
            r->type = URGN_SUBCONTINENT;
        } else {
            r->code = -1;
        }
        void* idStrAlias = (void*)&(r->idStr); // about to orphan 'r'. Save this off.
        uhash_put(newRegionIDMap.getAlias(),idStrAlias,(void *)(r.orphan()),&status); // regionIDMap takes ownership
    }

    // Process the territory aliases
    while ( ures_hasNext(territoryAlias.getAlias()) ) {
        LocalUResourceBundlePointer res(ures_getNextResource(territoryAlias.getAlias(),NULL,&status));
        const char *aliasFrom = ures_getKey(res.getAlias());
        LocalPointer<UnicodeString> aliasFromStr(new UnicodeString(aliasFrom, -1, US_INV), status);
        UnicodeString aliasTo = ures_getUnicodeStringByKey(res.getAlias(),"replacement",&status);
        res.adoptInstead(NULL);

        const Region *aliasToRegion = (Region *) uhash_get(newRegionIDMap.getAlias(),&aliasTo);
        Region *aliasFromRegion = (Region *)uhash_get(newRegionIDMap.getAlias(),aliasFromStr.getAlias());

        if ( aliasToRegion != NULL && aliasFromRegion == NULL ) { // This is just an alias from some string to a region
            uhash_put(newRegionAliases.getAlias(),(void *)aliasFromStr.orphan(), (void *)aliasToRegion,&status);
        } else {
            if ( aliasFromRegion == NULL ) { // Deprecated region code not in the master codes list - so need to create a deprecated region for it.
                LocalPointer<Region> newRgn(new Region, status); 
                if ( U_SUCCESS(status) ) {
                    aliasFromRegion = newRgn.orphan();
                } else {
                    return; // error out
                }
                aliasFromRegion->idStr.setTo(*aliasFromStr);
                aliasFromRegion->idStr.extract(0,aliasFromRegion->idStr.length(),aliasFromRegion->id,sizeof(aliasFromRegion->id),US_INV);
                uhash_put(newRegionIDMap.getAlias(),(void *)&(aliasFromRegion->idStr),(void *)aliasFromRegion,&status);
                Formattable result;
                UErrorCode ps = U_ZERO_ERROR;
                df->parse(aliasFromRegion->idStr,result,ps);
                if ( U_SUCCESS(ps) ) {
                    aliasFromRegion->code = result.getLong(); // Convert string to number
                    uhash_iput(newNumericCodeMap.getAlias(),aliasFromRegion->code,(void *)aliasFromRegion,&status);
                } else {
                    aliasFromRegion->code = -1;
                }
                aliasFromRegion->type = URGN_DEPRECATED;
            } else {
                aliasFromRegion->type = URGN_DEPRECATED;
            }

            {
                LocalPointer<UVector> newPreferredValues(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
                aliasFromRegion->preferredValues = newPreferredValues.orphan();
            }
            if( U_FAILURE(status)) {
                return;
            }
            UnicodeString currentRegion;
            //currentRegion.remove();   TODO: was already 0 length?
            for (int32_t i = 0 ; i < aliasTo.length() ; i++ ) {
                if ( aliasTo.charAt(i) != 0x0020 ) {
                    currentRegion.append(aliasTo.charAt(i));
                }
                if ( aliasTo.charAt(i) == 0x0020 || i+1 == aliasTo.length() ) {
                    Region *target = (Region *)uhash_get(newRegionIDMap.getAlias(),(void *)&currentRegion);
                    if (target) {
                        LocalPointer<UnicodeString> preferredValue(new UnicodeString(target->idStr), status);
                        aliasFromRegion->preferredValues->addElement((void *)preferredValue.orphan(),status);  // may add null if err
                    }
                    currentRegion.remove();
                }
            }
        }
    }

    // Process the code mappings - This will allow us to assign numeric codes to most of the territories.
    while ( ures_hasNext(codeMappings.getAlias()) ) {
        UResourceBundle *mapping = ures_getNextResource(codeMappings.getAlias(),NULL,&status);
        if ( ures_getType(mapping) == URES_ARRAY && ures_getSize(mapping) == 3) {
            UnicodeString codeMappingID = ures_getUnicodeStringByIndex(mapping,0,&status);
            UnicodeString codeMappingNumber = ures_getUnicodeStringByIndex(mapping,1,&status);
            UnicodeString codeMapping3Letter = ures_getUnicodeStringByIndex(mapping,2,&status);

            Region *r = (Region *)uhash_get(newRegionIDMap.getAlias(),(void *)&codeMappingID);
            if ( r ) {
                Formattable result;
                UErrorCode ps = U_ZERO_ERROR;
                df->parse(codeMappingNumber,result,ps);
                if ( U_SUCCESS(ps) ) {
                    r->code = result.getLong(); // Convert string to number
                    uhash_iput(newNumericCodeMap.getAlias(),r->code,(void *)r,&status);
                }
                LocalPointer<UnicodeString> code3(new UnicodeString(codeMapping3Letter), status);
                uhash_put(newRegionAliases.getAlias(),(void *)code3.orphan(), (void *)r,&status);
            }
        }
        ures_close(mapping);
    }

    // Now fill in the special cases for WORLD, UNKNOWN, CONTINENTS, and GROUPINGS
    Region *r;
    UnicodeString WORLD_ID_STRING(WORLD_ID);
    r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&WORLD_ID_STRING);
    if ( r ) {
        r->type = URGN_WORLD;
    }

    UnicodeString UNKNOWN_REGION_ID_STRING(UNKNOWN_REGION_ID);
    r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&UNKNOWN_REGION_ID_STRING);
    if ( r ) {
        r->type = URGN_UNKNOWN;
    }

    for ( int32_t i = 0 ; i < continents->size() ; i++ ) {
        r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)continents->elementAt(i));
        if ( r ) {
            r->type = URGN_CONTINENT;
        }
    }

    for ( int32_t i = 0 ; i < groupings->size() ; i++ ) {
        r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)groupings->elementAt(i));
        if ( r ) {
            r->type = URGN_GROUPING;
        }
    }

    // Special case: The region code "QO" (Outlying Oceania) is a subcontinent code added by CLDR
    // even though it looks like a territory code.  Need to handle it here.

    UnicodeString OUTLYING_OCEANIA_REGION_ID_STRING(OUTLYING_OCEANIA_REGION_ID);
    r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&OUTLYING_OCEANIA_REGION_ID_STRING);
    if ( r ) {
        r->type = URGN_SUBCONTINENT;
    }

    // Load territory containment info from the supplemental data.
    while ( ures_hasNext(territoryContainment.getAlias()) ) {
        LocalUResourceBundlePointer mapping(ures_getNextResource(territoryContainment.getAlias(),NULL,&status));
        if( U_FAILURE(status) ) {
            return;  // error out
        }
        const char *parent = ures_getKey(mapping.getAlias());
        if (uprv_strcmp(parent, "containedGroupings") == 0 || uprv_strcmp(parent, "deprecated") == 0) {
            continue; // handle new pseudo-parent types added in ICU data per cldrbug 7808; for now just skip.
            // #11232 is to do something useful with these.
        }
        UnicodeString parentStr = UnicodeString(parent, -1 , US_INV);
        Region *parentRegion = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&parentStr);

        for ( int j = 0 ; j < ures_getSize(mapping.getAlias()); j++ ) {
            UnicodeString child = ures_getUnicodeStringByIndex(mapping.getAlias(),j,&status);
            Region *childRegion = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&child);
            if ( parentRegion != NULL && childRegion != NULL ) {

                // Add the child region to the set of regions contained by the parent
                if (parentRegion->containedRegions == NULL) {
                    parentRegion->containedRegions = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
                }

                LocalPointer<UnicodeString> childStr(new UnicodeString(), status);
                if( U_FAILURE(status) ) {
                    return;  // error out
                }
                childStr->fastCopyFrom(childRegion->idStr);
                parentRegion->containedRegions->addElement((void *)childStr.orphan(),status);

                // Set the parent region to be the containing region of the child.
                // Regions of type GROUPING can't be set as the parent, since another region
                // such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent.
                if ( parentRegion->type != URGN_GROUPING) {
                    childRegion->containingRegion = parentRegion;
                }
            }
        }
    }

    // Create the availableRegions lists
    int32_t pos = UHASH_FIRST;
    while ( const UHashElement* element = uhash_nextElement(newRegionIDMap.getAlias(),&pos)) {
        Region *ar = (Region *)element->value.pointer;
        if ( availableRegions[ar->type] == NULL ) {
            LocalPointer<UVector> newAr(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
            availableRegions[ar->type] = newAr.orphan();
        }
        LocalPointer<UnicodeString> arString(new UnicodeString(ar->idStr), status);
        if( U_FAILURE(status) ) {
            return;  // error out
        }
        availableRegions[ar->type]->addElement((void *)arString.orphan(),status);
    }
    
    ucln_i18n_registerCleanup(UCLN_I18N_REGION, region_cleanup);
    // copy hashtables
    numericCodeMap = newNumericCodeMap.orphan();
    regionIDMap = newRegionIDMap.orphan();
    regionAliases = newRegionAliases.orphan();
}
コード例 #11
0
extern void
storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length,
             UStringPrepType type, UErrorCode* status){
    
 
    UChar* map = NULL;
    int16_t adjustedLen=0, i;
    uint16_t trieWord = 0;
    ValueStruct *value = NULL;
    uint32_t savedTrieWord = 0;

    /* initialize the hashtable */
    if(hashTable==NULL){
        hashTable = uhash_open(hashEntry, compareEntries, NULL, status);
        uhash_setValueDeleter(hashTable, valueDeleter);
    }
    
    /* figure out if the code point has type already stored */
    savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
    if(savedTrieWord!=0){
        if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
            /* turn on the first bit in trie word */
            trieWord += 0x01;
        }else{
            /* 
             * the codepoint has value something other than prohibited
             * and a mapping .. error! 
             */
            fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
            exit(U_ILLEGAL_ARGUMENT_ERROR); 
        } 
    }

    /* figure out the real length */ 
    for(i=0; i<length; i++){
        if(mapping[i] > 0xFFFF){
            adjustedLen +=2;
        }else{
            adjustedLen++;
        }      
    }

    if(adjustedLen == 0){
        trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2);
        /* make sure that the value of trieWord is less than the threshold */
        if(trieWord < _SPREP_TYPE_THRESHOLD){   
            /* now set the value in the trie */
            if(!utrie_set32(sprepTrie,codepoint,trieWord)){
                fprintf(stderr,"Could not set the value for code point.\n");
                exit(U_ILLEGAL_ARGUMENT_ERROR);   
            }
            /* value is set so just return */
            return;
        }else{
            fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
            exit(U_ILLEGAL_CHAR_FOUND);
        }
    }

    if(adjustedLen == 1){
        /* calculate the delta */
        int16_t delta = (int16_t)((int32_t)codepoint - (int16_t) mapping[0]);
        if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){

            trieWord = delta << 2;


            /* make sure that the second bit is OFF */
            if((trieWord & 0x02) != 0 ){
                fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n");
                exit(U_INTERNAL_PROGRAM_ERROR);
            }
            /* make sure that the value of trieWord is less than the threshold */
            if(trieWord < _SPREP_TYPE_THRESHOLD){   
                /* now set the value in the trie */
                if(!utrie_set32(sprepTrie,codepoint,trieWord)){
                    fprintf(stderr,"Could not set the value for code point.\n");
                    exit(U_ILLEGAL_ARGUMENT_ERROR);   
                }
                /* value is set so just return */
                return;
            }
        }
        /* 
         * if the delta is not in the given range or if the trieWord is larger than the threshold
         * just fall through for storing the mapping in the mapping table
         */
    }

    map = (UChar*) uprv_calloc(adjustedLen + 1, U_SIZEOF_UCHAR);
    i=0;
    
    while(i<length){
        if(mapping[i] <= 0xFFFF){
            map[i] = (uint16_t)mapping[i];
        }else{
            map[i]   = U16_LEAD(mapping[i]);
            map[i+1] = U16_TRAIL(mapping[i]);
        }
        i++;
    }
    
    value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct));
    value->mapping = map;
    value->type   = type;
    value->length  = adjustedLen;
    if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){
        mappingDataCapacity++;
    }
    if(maxLength < value->length){
        maxLength = value->length;
    }
    uhash_iput(hashTable,codepoint,value,status);
    mappingDataCapacity += adjustedLen;

    if(U_FAILURE(*status)){
        fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status));
        exit(*status);
    }
}