U_NAMESPACE_USE /* functions available in the common library (for unistr_case.cpp) */ /* public API functions */ U_CAPI int32_t U_EXPORT2 u_strToTitle(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UBreakIterator *titleIter, const char *locale, UErrorCode *pErrorCode) { LocalPointer<BreakIterator> ownedIter; BreakIterator *iter; if(titleIter!=NULL) { iter=reinterpret_cast<BreakIterator *>(titleIter); } else { iter=BreakIterator::createWordInstance(Locale(locale), *pErrorCode); ownedIter.adoptInstead(iter); } if(U_FAILURE(*pErrorCode)) { return 0; } UnicodeString s(srcLength<0, src, srcLength); iter->setText(s); return ustrcase_mapWithOverlap( ustrcase_getCaseLocale(locale), 0, iter, dest, destCapacity, src, srcLength, ustrcase_internalToTitle, *pErrorCode); }
void CalendarLimitTest::TestLimitsThread(int32_t threadNum) { logln("thread %d starting", threadNum); int32_t testIndex = 0; LocalPointer<Calendar> cal; while (gTestCaseIterator.next(testIndex)) { TestCase &testCase = TestCases[testIndex]; logln("begin test of %s calendar.", testCase.type); UErrorCode status = U_ZERO_ERROR; char buf[64]; uprv_strcpy(buf, "root@calendar="); strcat(buf, testCase.type); cal.adoptInstead(Calendar::createInstance(buf, status)); if (failure(status, "Calendar::createInstance", TRUE)) { continue; } if (uprv_strcmp(cal->getType(), testCase.type) != 0) { errln((UnicodeString)"FAIL: Wrong calendar type: " + cal->getType() + " Requested: " + testCase.type); continue; } doTheoreticalLimitsTest(*(cal.getAlias()), testCase.hasLeapMonth); doLimitsTest(*(cal.getAlias()), testCase.actualTestStart, testCase.actualTestEnd); logln("end test of %s calendar.", testCase.type); } }
UBool ThreadSafeFormat::doStuff(int32_t offset, UnicodeString &appendErr, UErrorCode &status) const { UBool okay = TRUE; if(u_strcmp(fFormat->getCurrency(), kUSD)) { appendErr.append("fFormat currency != ") .append(kUSD) .append(", =") .append(fFormat->getCurrency()) .append("! "); okay = FALSE; } if(u_strcmp(gSharedData->fFormat->getCurrency(), kUSD)) { appendErr.append("gFormat currency != ") .append(kUSD) .append(", =") .append(gSharedData->fFormat->getCurrency()) .append("! "); okay = FALSE; } UnicodeString str; const UnicodeString *o=NULL; Formattable f; const NumberFormat *nf = NULL; // only operate on it as const. switch(offset%4) { case 0: f = gSharedData->fYDDThing; o = &gSharedData->fYDDStr; nf = gSharedData->fFormat.getAlias(); break; case 1: f = gSharedData->fBBDThing; o = &gSharedData->fBBDStr; nf = gSharedData->fFormat.getAlias(); break; case 2: f = gSharedData->fYDDThing; o = &gSharedData->fYDDStr; nf = fFormat.getAlias(); break; case 3: f = gSharedData->fBBDThing; o = &gSharedData->fBBDStr; nf = fFormat.getAlias(); break; } nf->format(f, str, NULL, status); if(*o != str) { appendErr.append(showDifference(*o, str)); okay = FALSE; } return okay; }
FormatThreadTest() // constructor is NOT multithread safe. : SimpleThread(), fNum(0), fTraceInfo(0), fTSF(NULL), fOffset(0) // the locale to use { UErrorCode status = U_ZERO_ERROR; // TODO: rearrange code to allow checking of status. fTSF.adoptInstead(new ThreadSafeFormat(status)); static int32_t fgOffset = 0; fgOffset += 3; fOffset = fgOffset; }
U_NAMESPACE_BEGIN int32_t CaseMap::toTitle( const char *locale, uint32_t options, BreakIterator *iter, const UChar *src, int32_t srcLength, UChar *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode) { LocalPointer<BreakIterator> ownedIter; if(iter==NULL) { iter=BreakIterator::createWordInstance(Locale(locale), errorCode); ownedIter.adoptInstead(iter); } if(U_FAILURE(errorCode)) { return 0; } UnicodeString s(srcLength<0, src, srcLength); iter->setText(s); return ustrcase_map( ustrcase_getCaseLocale(locale), options, iter, dest, destCapacity, src, srcLength, ustrcase_internalToTitle, edits, errorCode); }
void PluralRulesTest::checkSelect(const LocalPointer<PluralRules> &rules, UErrorCode &status, int32_t line, const char *keyword, ...) { // The varargs parameters are a const char* strings, each being a decimal number. // The formatting of the numbers as strings is significant, e.g. // the difference between "2" and "2.0" can affect which rule matches (which keyword is selected). // Note: rules parameter is a LocalPointer reference rather than a PluralRules * to avoid having // to write getAlias() at every (numerous) call site. if (U_FAILURE(status)) { errln("file %s, line %d, ICU error status: %s.", __FILE__, line, u_errorName(status)); status = U_ZERO_ERROR; return; } if (rules == NULL) { errln("file %s, line %d: rules pointer is NULL", __FILE__, line); return; } va_list ap; va_start(ap, keyword); for (;;) { const char *num = va_arg(ap, const char *); if (strcmp(num, END_MARK) == 0) { break; } // DigitList is a convenient way to parse the decimal number string and get a double. DigitList dl; dl.set(StringPiece(num), status); if (U_FAILURE(status)) { errln("file %s, line %d, ICU error status: %s.", __FILE__, line, u_errorName(status)); status = U_ZERO_ERROR; continue; } double numDbl = dl.getDouble(); const char *decimalPoint = strchr(num, '.'); int fractionDigitCount = decimalPoint == NULL ? 0 : (num + strlen(num) - 1) - decimalPoint; int fractionDigits = fractionDigitCount == 0 ? 0 : atoi(decimalPoint + 1); FixedDecimal ni(numDbl, fractionDigitCount, fractionDigits); UnicodeString actualKeyword = rules->select(ni); if (actualKeyword != UnicodeString(keyword)) { errln("file %s, line %d, select(%s) returned incorrect keyword. Expected %s, got %s", __FILE__, line, num, keyword, US(actualKeyword).cstr()); } } va_end(ap); }
U_CAPI int U_EXPORT2 writePackageDatFile(const char *outFilename, const char *outComment, const char *sourcePath, const char *addList, Package *pkg, char outType) { LocalPointer<Package> ownedPkg; LocalPointer<Package> addListPkg; if (pkg == NULL) { ownedPkg.adoptInstead(new Package); if(ownedPkg.isNull()) { fprintf(stderr, "icupkg: not enough memory\n"); return U_MEMORY_ALLOCATION_ERROR; } pkg = ownedPkg.getAlias(); addListPkg.adoptInstead(readList(sourcePath, addList, TRUE, NULL)); if(addListPkg.isValid()) { pkg->addItems(*addListPkg); } else { return U_ILLEGAL_ARGUMENT_ERROR; } } pkg->writePackage(outFilename, outType, outComment); return 0; }
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fDelegate->refreshInputText(input,status); return *this; }
/* -- text modifying -- */ virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(text,status); }
virtual void setText(const UnicodeString &text) { fDelegate->setText(text); }
/* Process a file */ void processFile(const char *filename, const char *cp, const char *inputDir, const char *outputDir, const char *packageName, SRBRoot *newPoolBundle, UBool omitBinaryCollation, UErrorCode &status) { LocalPointer<SRBRoot> data; UCHARBUF *ucbuf = NULL; char *rbname = NULL; char *openFileName = NULL; char *inputDirBuf = NULL; char outputFileName[256]; int32_t dirlen = 0; int32_t filelen = 0; if (U_FAILURE(status)) { return; } if(filename==NULL){ status=U_ILLEGAL_ARGUMENT_ERROR; return; }else{ filelen = (int32_t)uprv_strlen(filename); } if(inputDir == NULL) { const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR); openFileName = (char *) uprv_malloc(dirlen + filelen + 2); openFileName[0] = '\0'; if (filenameBegin != NULL) { /* * When a filename ../../../data/root.txt is specified, * we presume that the input directory is ../../../data * This is very important when the resource file includes * another file, like UCARules.txt or thaidict.brk. */ int32_t filenameSize = (int32_t)(filenameBegin - filename + 1); inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize); /* test for NULL */ if(inputDirBuf == NULL) { status = U_MEMORY_ALLOCATION_ERROR; goto finish; } inputDirBuf[filenameSize - 1] = 0; inputDir = inputDirBuf; dirlen = (int32_t)uprv_strlen(inputDir); } }else{ dirlen = (int32_t)uprv_strlen(inputDir); if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) { openFileName = (char *) uprv_malloc(dirlen + filelen + 2); /* test for NULL */ if(openFileName == NULL) { status = U_MEMORY_ALLOCATION_ERROR; goto finish; } openFileName[0] = '\0'; /* * append the input dir to openFileName if the first char in * filename is not file seperation char and the last char input directory is not '.'. * This is to support : * genrb -s. /home/icu/data * genrb -s. icu/data * The user cannot mix notations like * genrb -s. /icu/data --- the absolute path specified. -s redundant * user should use * genrb -s. icu/data --- start from CWD and look in icu/data dir */ if( (filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){ uprv_strcpy(openFileName, inputDir); openFileName[dirlen] = U_FILE_SEP_CHAR; } openFileName[dirlen + 1] = '\0'; } else { openFileName = (char *) uprv_malloc(dirlen + filelen + 1); /* test for NULL */ if(openFileName == NULL) { status = U_MEMORY_ALLOCATION_ERROR; goto finish; } uprv_strcpy(openFileName, inputDir); } } uprv_strcat(openFileName, filename); ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, &status); if(status == U_FILE_ACCESS_ERROR) { fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName); goto finish; } if (ucbuf == NULL || U_FAILURE(status)) { fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName, u_errorName(status)); goto finish; } /* auto detected popular encodings? */ if (cp!=NULL && isVerbose()) { printf("autodetected encoding %s\n", cp); } /* Parse the data into an SRBRoot */ data.adoptInstead(parse(ucbuf, inputDir, outputDir, filename, !omitBinaryCollation, options[NO_COLLATION_RULES].doesOccur, &status)); if (data.isNull() || U_FAILURE(status)) { fprintf(stderr, "couldn't parse the file %s. Error:%s\n", filename, u_errorName(status)); goto finish; } if(options[WRITE_POOL_BUNDLE].doesOccur) { data->fWritePoolBundle = newPoolBundle; data->compactKeys(status); int32_t newKeysLength; const char *newKeys = data->getKeyBytes(&newKeysLength); newPoolBundle->addKeyBytes(newKeys, newKeysLength, status); if(U_FAILURE(status)) { fprintf(stderr, "bundle_compactKeys(%s) or bundle_getKeyBytes() failed: %s\n", filename, u_errorName(status)); goto finish; } /* count the number of just-added key strings */ for(const char *newKeysLimit = newKeys + newKeysLength; newKeys < newKeysLimit; ++newKeys) { if(*newKeys == 0) { ++newPoolBundle->fKeysCount; } } } if(options[USE_POOL_BUNDLE].doesOccur) { data->fUsePoolBundle = &poolBundle; } /* Determine the target rb filename */ rbname = make_res_filename(filename, outputDir, packageName, status); if(U_FAILURE(status)) { fprintf(stderr, "couldn't make the res fileName for bundle %s. Error:%s\n", filename, u_errorName(status)); goto finish; } if(write_java== TRUE){ bundle_write_java(data.getAlias(), outputDir, outputEnc, outputFileName, sizeof(outputFileName), options[JAVA_PACKAGE].value, options[BUNDLE_NAME].value, &status); }else if(write_xliff ==TRUE){ bundle_write_xml(data.getAlias(), outputDir, outputEnc, filename, outputFileName, sizeof(outputFileName), language, xliffOutputFileName, &status); }else{ /* Write the data to the file */ data->write(outputDir, packageName, outputFileName, sizeof(outputFileName), status); } if (U_FAILURE(status)) { fprintf(stderr, "couldn't write bundle %s. Error:%s\n", outputFileName, u_errorName(status)); } finish: if (inputDirBuf != NULL) { uprv_free(inputDirBuf); } if (openFileName != NULL) { uprv_free(openFileName); } if(ucbuf) { ucbuf_close(ucbuf); } if (rbname) { uprv_free(rbname); } }
NumberFormat* NumberFormat::makeInstance(const Locale& desiredLocale, UNumberFormatStyle style, UBool mustBeDecimalFormat, UErrorCode& status) { if (U_FAILURE(status)) return NULL; if (style < 0 || style >= UNUM_FORMAT_STYLE_COUNT) { status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } // Some styles are not supported. This is a result of merging // the @draft ICU 4.2 NumberFormat::EStyles into the long-existing UNumberFormatStyle. // Ticket #8503 is for reviewing/fixing/merging the two relevant implementations: // this one and unum_open(). // The UNUM_PATTERN_ styles are not supported here // because this method does not take a pattern string. if (!isStyleSupported(style)) { status = U_UNSUPPORTED_ERROR; return NULL; } #if U_PLATFORM_USES_ONLY_WIN32_API if (!mustBeDecimalFormat) { char buffer[8]; int32_t count = desiredLocale.getKeywordValue("compat", buffer, sizeof(buffer), status); // if the locale has "@compat=host", create a host-specific NumberFormat if (U_SUCCESS(status) && count > 0 && uprv_strcmp(buffer, "host") == 0) { Win32NumberFormat *f = NULL; UBool curr = TRUE; switch (style) { case UNUM_DECIMAL: curr = FALSE; // fall-through case UNUM_CURRENCY: case UNUM_CURRENCY_ISO: // do not support plural formatting here case UNUM_CURRENCY_PLURAL: f = new Win32NumberFormat(desiredLocale, curr, status); if (U_SUCCESS(status)) { return f; } delete f; break; default: break; } } } #endif // Use numbering system cache hashtable umtx_initOnce(gNSCacheInitOnce, &nscacheInit); // Get cached numbering system LocalPointer<NumberingSystem> ownedNs; NumberingSystem *ns = NULL; if (NumberingSystem_cache != NULL) { // TODO: Bad hash key usage, see ticket #8504. int32_t hashKey = desiredLocale.hashCode(); Mutex lock(&nscacheMutex); ns = (NumberingSystem *)uhash_iget(NumberingSystem_cache, hashKey); if (ns == NULL) { ns = NumberingSystem::createInstance(desiredLocale,status); uhash_iput(NumberingSystem_cache, hashKey, (void*)ns, &status); } } else { ownedNs.adoptInstead(NumberingSystem::createInstance(desiredLocale,status)); ns = ownedNs.getAlias(); } // check results of getting a numbering system if (U_FAILURE(status)) { return NULL; } if (mustBeDecimalFormat && ns->isAlgorithmic()) { status = U_UNSUPPORTED_ERROR; return NULL; } LocalPointer<DecimalFormatSymbols> symbolsToAdopt; UnicodeString pattern; LocalUResourceBundlePointer ownedResource(ures_open(NULL, desiredLocale.getName(), &status)); if (U_FAILURE(status)) { // We don't appear to have resource data available -- use the last-resort data status = U_USING_FALLBACK_WARNING; // When the data is unavailable, and locale isn't passed in, last resort data is used. symbolsToAdopt.adoptInstead(new DecimalFormatSymbols(status)); if (symbolsToAdopt.isNull()) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } // Creates a DecimalFormat instance with the last resort number patterns. pattern.setTo(TRUE, gLastResortNumberPatterns[style], -1); } else { // Loads the decimal symbols of the desired locale. symbolsToAdopt.adoptInstead(new DecimalFormatSymbols(desiredLocale, status)); if (symbolsToAdopt.isNull()) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } UResourceBundle *resource = ownedResource.orphan(); UResourceBundle *numElements = ures_getByKeyWithFallback(resource, gNumberElements, NULL, &status); resource = ures_getByKeyWithFallback(numElements, ns->getName(), resource, &status); resource = ures_getByKeyWithFallback(resource, gPatterns, resource, &status); ownedResource.adoptInstead(resource); int32_t patLen = 0; const UChar *patResStr = ures_getStringByKeyWithFallback(resource, gFormatKeys[style], &patLen, &status); // Didn't find a pattern specific to the numbering system, so fall back to "latn" if ( status == U_MISSING_RESOURCE_ERROR && uprv_strcmp(gLatn,ns->getName())) { status = U_ZERO_ERROR; resource = ures_getByKeyWithFallback(numElements, gLatn, resource, &status); resource = ures_getByKeyWithFallback(resource, gPatterns, resource, &status); patResStr = ures_getStringByKeyWithFallback(resource, gFormatKeys[style], &patLen, &status); } ures_close(numElements); // Creates the specified decimal format style of the desired locale. pattern.setTo(TRUE, patResStr, patLen); } if (U_FAILURE(status)) { return NULL; } if(style==UNUM_CURRENCY || style == UNUM_CURRENCY_ISO){ const UChar* currPattern = symbolsToAdopt->getCurrencyPattern(); if(currPattern!=NULL){ pattern.setTo(currPattern, u_strlen(currPattern)); } } NumberFormat *f; if (ns->isAlgorithmic()) { UnicodeString nsDesc; UnicodeString nsRuleSetGroup; UnicodeString nsRuleSetName; Locale nsLoc; URBNFRuleSetTag desiredRulesType = URBNF_NUMBERING_SYSTEM; nsDesc.setTo(ns->getDescription()); int32_t firstSlash = nsDesc.indexOf(gSlash); int32_t lastSlash = nsDesc.lastIndexOf(gSlash); if ( lastSlash > firstSlash ) { CharString nsLocID; nsLocID.appendInvariantChars(nsDesc.tempSubString(0, firstSlash), status); nsRuleSetGroup.setTo(nsDesc,firstSlash+1,lastSlash-firstSlash-1); nsRuleSetName.setTo(nsDesc,lastSlash+1); nsLoc = Locale::createFromName(nsLocID.data()); UnicodeString SpelloutRules = UNICODE_STRING_SIMPLE("SpelloutRules"); if ( nsRuleSetGroup.compare(SpelloutRules) == 0 ) { desiredRulesType = URBNF_SPELLOUT; } } else { nsLoc = desiredLocale; nsRuleSetName.setTo(nsDesc); } RuleBasedNumberFormat *r = new RuleBasedNumberFormat(desiredRulesType,nsLoc,status); if (r == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } r->setDefaultRuleSet(nsRuleSetName,status); f = r; } else { // replace single currency sign in the pattern with double currency sign // if the style is UNUM_CURRENCY_ISO if (style == UNUM_CURRENCY_ISO) { pattern.findAndReplace(UnicodeString(TRUE, gSingleCurrencySign, 1), UnicodeString(TRUE, gDoubleCurrencySign, 2)); } // "new DecimalFormat()" does not adopt the symbols if its memory allocation fails. DecimalFormatSymbols *syms = symbolsToAdopt.orphan(); f = new DecimalFormat(pattern, syms, style, status); if (f == NULL) { delete syms; status = U_MEMORY_ALLOCATION_ERROR; return NULL; } } f->setLocaleIDs(ures_getLocaleByType(ownedResource.getAlias(), ULOC_VALID_LOCALE, &status), ures_getLocaleByType(ownedResource.getAlias(), ULOC_ACTUAL_LOCALE, &status)); if (U_FAILURE(status)) { delete f; return NULL; } return f; }
virtual UBool isBoundary(int32_t offset) { return fDelegate->isBoundary(offset); }
virtual int32_t previous(void) { return fDelegate->previous(); }
virtual int32_t preceding(int32_t offset) { return fDelegate->preceding(offset); }
/* -- ITERATION -- */ virtual int32_t first(void) { return fDelegate->first(); }
virtual CharacterIterator& getText(void) const { return fDelegate->getText(); }
/* -- other functions that are just delegated -- */ virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDelegate->getUText(fillIn,status); }
virtual int32_t current(void) const { return fDelegate->current(); }
virtual int32_t next(int32_t n) { return fDelegate->next(n); }
/* * Initializes the region data from the ICU resource bundles. The region data * contains the basic relationships such as which regions are known, what the numeric * codes are, any known aliases, and the territory containment data. * * If the region data has already loaded, then this method simply returns without doing * anything meaningful. */ void Region::loadRegionData(UErrorCode &status) { // Construct service objs first LocalUHashtablePointer newRegionIDMap(uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status)); LocalUHashtablePointer newNumericCodeMap(uhash_open(uhash_hashLong,uhash_compareLong,NULL,&status)); LocalUHashtablePointer newRegionAliases(uhash_open(uhash_hashUnicodeString,uhash_compareUnicodeString,NULL,&status)); LocalPointer<DecimalFormat> df(new DecimalFormat(status), status); LocalPointer<UVector> continents(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status); LocalPointer<UVector> groupings(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status); allRegions = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); LocalUResourceBundlePointer metadata(ures_openDirect(NULL,"metadata",&status)); LocalUResourceBundlePointer metadataAlias(ures_getByKey(metadata.getAlias(),"alias",NULL,&status)); LocalUResourceBundlePointer territoryAlias(ures_getByKey(metadataAlias.getAlias(),"territory",NULL,&status)); LocalUResourceBundlePointer supplementalData(ures_openDirect(NULL,"supplementalData",&status)); LocalUResourceBundlePointer codeMappings(ures_getByKey(supplementalData.getAlias(),"codeMappings",NULL,&status)); LocalUResourceBundlePointer idValidity(ures_getByKey(supplementalData.getAlias(),"idValidity",NULL,&status)); LocalUResourceBundlePointer regionList(ures_getByKey(idValidity.getAlias(),"region",NULL,&status)); LocalUResourceBundlePointer regionRegular(ures_getByKey(regionList.getAlias(),"regular",NULL,&status)); LocalUResourceBundlePointer regionMacro(ures_getByKey(regionList.getAlias(),"macroregion",NULL,&status)); LocalUResourceBundlePointer regionUnknown(ures_getByKey(regionList.getAlias(),"unknown",NULL,&status)); LocalUResourceBundlePointer territoryContainment(ures_getByKey(supplementalData.getAlias(),"territoryContainment",NULL,&status)); LocalUResourceBundlePointer worldContainment(ures_getByKey(territoryContainment.getAlias(),"001",NULL,&status)); LocalUResourceBundlePointer groupingContainment(ures_getByKey(territoryContainment.getAlias(),"grouping",NULL,&status)); if (U_FAILURE(status)) { return; } // now, initialize df->setParseIntegerOnly(TRUE); uhash_setValueDeleter(newRegionIDMap.getAlias(), deleteRegion); // regionIDMap owns objs uhash_setKeyDeleter(newRegionAliases.getAlias(), uprv_deleteUObject); // regionAliases owns the string keys while ( ures_hasNext(regionRegular.getAlias()) ) { UnicodeString regionName = ures_getNextUnicodeString(regionRegular.getAlias(),NULL,&status); int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER); UChar buf[6]; regionName.extract(buf,6,status); if ( rangeMarkerLocation > 0 ) { UChar endRange = regionName.charAt(rangeMarkerLocation+1); buf[rangeMarkerLocation] = 0; while ( buf[rangeMarkerLocation-1] <= endRange ) { LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status); allRegions->addElement(newRegion.orphan(),status); buf[rangeMarkerLocation-1]++; } } else { LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status); allRegions->addElement(newRegion.orphan(),status); } } while ( ures_hasNext(regionMacro.getAlias()) ) { UnicodeString regionName = ures_getNextUnicodeString(regionMacro.getAlias(),NULL,&status); int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER); UChar buf[6]; regionName.extract(buf,6,status); if ( rangeMarkerLocation > 0 ) { UChar endRange = regionName.charAt(rangeMarkerLocation+1); buf[rangeMarkerLocation] = 0; while ( buf[rangeMarkerLocation-1] <= endRange ) { LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status); allRegions->addElement(newRegion.orphan(),status); buf[rangeMarkerLocation-1]++; } } else { LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status); allRegions->addElement(newRegion.orphan(),status); } } while ( ures_hasNext(regionUnknown.getAlias()) ) { LocalPointer<UnicodeString> regionName (new UnicodeString(ures_getNextUnicodeString(regionUnknown.getAlias(),NULL,&status),status)); allRegions->addElement(regionName.orphan(),status); } while ( ures_hasNext(worldContainment.getAlias()) ) { UnicodeString *continentName = new UnicodeString(ures_getNextUnicodeString(worldContainment.getAlias(),NULL,&status)); continents->addElement(continentName,status); } while ( ures_hasNext(groupingContainment.getAlias()) ) { UnicodeString *groupingName = new UnicodeString(ures_getNextUnicodeString(groupingContainment.getAlias(),NULL,&status)); groupings->addElement(groupingName,status); } for ( int32_t i = 0 ; i < allRegions->size() ; i++ ) { LocalPointer<Region> r(new Region(), status); if ( U_FAILURE(status) ) { return; } UnicodeString *regionName = (UnicodeString *)allRegions->elementAt(i); r->idStr = *regionName; r->idStr.extract(0,r->idStr.length(),r->id,sizeof(r->id),US_INV); r->type = URGN_TERRITORY; // Only temporary - figure out the real type later once the aliases are known. Formattable result; UErrorCode ps = U_ZERO_ERROR; df->parse(r->idStr,result,ps); if ( U_SUCCESS(ps) ) { r->code = result.getLong(); // Convert string to number uhash_iput(newNumericCodeMap.getAlias(),r->code,(void *)(r.getAlias()),&status); r->type = URGN_SUBCONTINENT; } else { r->code = -1; } void* idStrAlias = (void*)&(r->idStr); // about to orphan 'r'. Save this off. uhash_put(newRegionIDMap.getAlias(),idStrAlias,(void *)(r.orphan()),&status); // regionIDMap takes ownership } // Process the territory aliases while ( ures_hasNext(territoryAlias.getAlias()) ) { LocalUResourceBundlePointer res(ures_getNextResource(territoryAlias.getAlias(),NULL,&status)); const char *aliasFrom = ures_getKey(res.getAlias()); LocalPointer<UnicodeString> aliasFromStr(new UnicodeString(aliasFrom, -1, US_INV), status); UnicodeString aliasTo = ures_getUnicodeStringByKey(res.getAlias(),"replacement",&status); res.adoptInstead(NULL); const Region *aliasToRegion = (Region *) uhash_get(newRegionIDMap.getAlias(),&aliasTo); Region *aliasFromRegion = (Region *)uhash_get(newRegionIDMap.getAlias(),aliasFromStr.getAlias()); if ( aliasToRegion != NULL && aliasFromRegion == NULL ) { // This is just an alias from some string to a region uhash_put(newRegionAliases.getAlias(),(void *)aliasFromStr.orphan(), (void *)aliasToRegion,&status); } else { if ( aliasFromRegion == NULL ) { // Deprecated region code not in the master codes list - so need to create a deprecated region for it. LocalPointer<Region> newRgn(new Region, status); if ( U_SUCCESS(status) ) { aliasFromRegion = newRgn.orphan(); } else { return; // error out } aliasFromRegion->idStr.setTo(*aliasFromStr); aliasFromRegion->idStr.extract(0,aliasFromRegion->idStr.length(),aliasFromRegion->id,sizeof(aliasFromRegion->id),US_INV); uhash_put(newRegionIDMap.getAlias(),(void *)&(aliasFromRegion->idStr),(void *)aliasFromRegion,&status); Formattable result; UErrorCode ps = U_ZERO_ERROR; df->parse(aliasFromRegion->idStr,result,ps); if ( U_SUCCESS(ps) ) { aliasFromRegion->code = result.getLong(); // Convert string to number uhash_iput(newNumericCodeMap.getAlias(),aliasFromRegion->code,(void *)aliasFromRegion,&status); } else { aliasFromRegion->code = -1; } aliasFromRegion->type = URGN_DEPRECATED; } else { aliasFromRegion->type = URGN_DEPRECATED; } { LocalPointer<UVector> newPreferredValues(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status); aliasFromRegion->preferredValues = newPreferredValues.orphan(); } if( U_FAILURE(status)) { return; } UnicodeString currentRegion; //currentRegion.remove(); TODO: was already 0 length? for (int32_t i = 0 ; i < aliasTo.length() ; i++ ) { if ( aliasTo.charAt(i) != 0x0020 ) { currentRegion.append(aliasTo.charAt(i)); } if ( aliasTo.charAt(i) == 0x0020 || i+1 == aliasTo.length() ) { Region *target = (Region *)uhash_get(newRegionIDMap.getAlias(),(void *)¤tRegion); if (target) { LocalPointer<UnicodeString> preferredValue(new UnicodeString(target->idStr), status); aliasFromRegion->preferredValues->addElement((void *)preferredValue.orphan(),status); // may add null if err } currentRegion.remove(); } } } } // Process the code mappings - This will allow us to assign numeric codes to most of the territories. while ( ures_hasNext(codeMappings.getAlias()) ) { UResourceBundle *mapping = ures_getNextResource(codeMappings.getAlias(),NULL,&status); if ( ures_getType(mapping) == URES_ARRAY && ures_getSize(mapping) == 3) { UnicodeString codeMappingID = ures_getUnicodeStringByIndex(mapping,0,&status); UnicodeString codeMappingNumber = ures_getUnicodeStringByIndex(mapping,1,&status); UnicodeString codeMapping3Letter = ures_getUnicodeStringByIndex(mapping,2,&status); Region *r = (Region *)uhash_get(newRegionIDMap.getAlias(),(void *)&codeMappingID); if ( r ) { Formattable result; UErrorCode ps = U_ZERO_ERROR; df->parse(codeMappingNumber,result,ps); if ( U_SUCCESS(ps) ) { r->code = result.getLong(); // Convert string to number uhash_iput(newNumericCodeMap.getAlias(),r->code,(void *)r,&status); } LocalPointer<UnicodeString> code3(new UnicodeString(codeMapping3Letter), status); uhash_put(newRegionAliases.getAlias(),(void *)code3.orphan(), (void *)r,&status); } } ures_close(mapping); } // Now fill in the special cases for WORLD, UNKNOWN, CONTINENTS, and GROUPINGS Region *r; UnicodeString WORLD_ID_STRING(WORLD_ID); r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&WORLD_ID_STRING); if ( r ) { r->type = URGN_WORLD; } UnicodeString UNKNOWN_REGION_ID_STRING(UNKNOWN_REGION_ID); r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&UNKNOWN_REGION_ID_STRING); if ( r ) { r->type = URGN_UNKNOWN; } for ( int32_t i = 0 ; i < continents->size() ; i++ ) { r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)continents->elementAt(i)); if ( r ) { r->type = URGN_CONTINENT; } } for ( int32_t i = 0 ; i < groupings->size() ; i++ ) { r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)groupings->elementAt(i)); if ( r ) { r->type = URGN_GROUPING; } } // Special case: The region code "QO" (Outlying Oceania) is a subcontinent code added by CLDR // even though it looks like a territory code. Need to handle it here. UnicodeString OUTLYING_OCEANIA_REGION_ID_STRING(OUTLYING_OCEANIA_REGION_ID); r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&OUTLYING_OCEANIA_REGION_ID_STRING); if ( r ) { r->type = URGN_SUBCONTINENT; } // Load territory containment info from the supplemental data. while ( ures_hasNext(territoryContainment.getAlias()) ) { LocalUResourceBundlePointer mapping(ures_getNextResource(territoryContainment.getAlias(),NULL,&status)); if( U_FAILURE(status) ) { return; // error out } const char *parent = ures_getKey(mapping.getAlias()); if (uprv_strcmp(parent, "containedGroupings") == 0 || uprv_strcmp(parent, "deprecated") == 0) { continue; // handle new pseudo-parent types added in ICU data per cldrbug 7808; for now just skip. // #11232 is to do something useful with these. } UnicodeString parentStr = UnicodeString(parent, -1 , US_INV); Region *parentRegion = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&parentStr); for ( int j = 0 ; j < ures_getSize(mapping.getAlias()); j++ ) { UnicodeString child = ures_getUnicodeStringByIndex(mapping.getAlias(),j,&status); Region *childRegion = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&child); if ( parentRegion != NULL && childRegion != NULL ) { // Add the child region to the set of regions contained by the parent if (parentRegion->containedRegions == NULL) { parentRegion->containedRegions = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); } LocalPointer<UnicodeString> childStr(new UnicodeString(), status); if( U_FAILURE(status) ) { return; // error out } childStr->fastCopyFrom(childRegion->idStr); parentRegion->containedRegions->addElement((void *)childStr.orphan(),status); // Set the parent region to be the containing region of the child. // Regions of type GROUPING can't be set as the parent, since another region // such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent. if ( parentRegion->type != URGN_GROUPING) { childRegion->containingRegion = parentRegion; } } } } // Create the availableRegions lists int32_t pos = UHASH_FIRST; while ( const UHashElement* element = uhash_nextElement(newRegionIDMap.getAlias(),&pos)) { Region *ar = (Region *)element->value.pointer; if ( availableRegions[ar->type] == NULL ) { LocalPointer<UVector> newAr(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status); availableRegions[ar->type] = newAr.orphan(); } LocalPointer<UnicodeString> arString(new UnicodeString(ar->idStr), status); if( U_FAILURE(status) ) { return; // error out } availableRegions[ar->type]->addElement((void *)arString.orphan(),status); } ucln_i18n_registerCleanup(UCLN_I18N_REGION, region_cleanup); // copy hashtables numericCodeMap = newNumericCodeMap.orphan(); regionIDMap = newRegionIDMap.orphan(); regionAliases = newRegionAliases.orphan(); }
virtual void run() { fTraceInfo = 1; LocalPointer<NumberFormat> percentFormatter; UErrorCode status = U_ZERO_ERROR; #if 0 // debugging code, for (int i=0; i<4000; i++) { status = U_ZERO_ERROR; UDataMemory *data1 = udata_openChoice(0, "res", "en_US", isAcceptable, 0, &status); UDataMemory *data2 = udata_openChoice(0, "res", "fr", isAcceptable, 0, &status); udata_close(data1); udata_close(data2); if (U_FAILURE(status)) { error("udata_openChoice failed.\n"); break; } } return; #endif #if 0 // debugging code, int m; for (m=0; m<4000; m++) { status = U_ZERO_ERROR; UResourceBundle *res = NULL; const char *localeName = NULL; Locale loc = Locale::getEnglish(); localeName = loc.getName(); // localeName = "en"; // ResourceBundle bund = ResourceBundle(0, loc, status); //umtx_lock(&gDebugMutex); res = ures_open(NULL, localeName, &status); //umtx_unlock(&gDebugMutex); //umtx_lock(&gDebugMutex); ures_close(res); //umtx_unlock(&gDebugMutex); if (U_FAILURE(status)) { error("Resource bundle construction failed.\n"); break; } } return; #endif // Keep this data here to avoid static initialization. FormatThreadTestData kNumberFormatTestData[] = { FormatThreadTestData((double)5.0, UnicodeString("5", "")), FormatThreadTestData( 6.0, UnicodeString("6", "")), FormatThreadTestData( 20.0, UnicodeString("20", "")), FormatThreadTestData( 8.0, UnicodeString("8", "")), FormatThreadTestData( 8.3, UnicodeString("8.3", "")), FormatThreadTestData( 12345, UnicodeString("12,345", "")), FormatThreadTestData( 81890.23, UnicodeString("81,890.23", "")), }; int32_t kNumberFormatTestDataLength = (int32_t)(sizeof(kNumberFormatTestData) / sizeof(kNumberFormatTestData[0])); // Keep this data here to avoid static initialization. FormatThreadTestData kPercentFormatTestData[] = { FormatThreadTestData((double)5.0, CharsToUnicodeString("500\\u00a0%")), FormatThreadTestData( 1.0, CharsToUnicodeString("100\\u00a0%")), FormatThreadTestData( 0.26, CharsToUnicodeString("26\\u00a0%")), FormatThreadTestData( 16384.99, CharsToUnicodeString("1\\u00a0638\\u00a0499\\u00a0%")), // U+00a0 = NBSP FormatThreadTestData( 81890.23, CharsToUnicodeString("8\\u00a0189\\u00a0023\\u00a0%")), }; int32_t kPercentFormatTestDataLength = (int32_t)(sizeof(kPercentFormatTestData) / sizeof(kPercentFormatTestData[0])); int32_t iteration; status = U_ZERO_ERROR; LocalPointer<NumberFormat> formatter(NumberFormat::createInstance(Locale::getEnglish(),status)); if(U_FAILURE(status)) { error("Error on NumberFormat::createInstance()."); goto cleanupAndReturn; } percentFormatter.adoptInstead(NumberFormat::createPercentInstance(Locale::getFrench(),status)); if(U_FAILURE(status)) { error("Error on NumberFormat::createPercentInstance()."); goto cleanupAndReturn; } for(iteration = 0;!getError() && iteration<kFormatThreadIterations;iteration++) { int32_t whichLine = (iteration + fOffset)%kNumberFormatTestDataLength; UnicodeString output; formatter->format(kNumberFormatTestData[whichLine].number, output); if(0 != output.compare(kNumberFormatTestData[whichLine].string)) { error("format().. expected " + kNumberFormatTestData[whichLine].string + " got " + output); goto cleanupAndReturn; } // Now check percent. output.remove(); whichLine = (iteration + fOffset)%kPercentFormatTestDataLength; percentFormatter->format(kPercentFormatTestData[whichLine].number, output); if(0 != output.compare(kPercentFormatTestData[whichLine].string)) { error("percent format().. \n" + showDifference(kPercentFormatTestData[whichLine].string,output)); goto cleanupAndReturn; } // Test message error const int kNumberOfMessageTests = 3; UErrorCode statusToCheck; UnicodeString patternToCheck; Locale messageLocale; Locale countryToCheck; double currencyToCheck; UnicodeString expected; // load the cases. switch((iteration+fOffset) % kNumberOfMessageTests) { default: case 0: statusToCheck= U_FILE_ACCESS_ERROR; patternToCheck= "0:Someone from {2} is receiving a #{0}" " error - {1}. Their telephone call is costing " "{3,number,currency}."; // number,currency messageLocale= Locale("en","US"); countryToCheck= Locale("","HR"); currencyToCheck= 8192.77; expected= "0:Someone from Croatia is receiving a #4 error - " "U_FILE_ACCESS_ERROR. Their telephone call is costing $8,192.77."; break; case 1: statusToCheck= U_INDEX_OUTOFBOUNDS_ERROR; patternToCheck= "1:A customer in {2} is receiving a #{0} error - {1}. Their telephone call is costing {3,number,currency}."; // number,currency messageLocale= Locale("de","DE@currency=DEM"); countryToCheck= Locale("","BF"); currencyToCheck= 2.32; expected= CharsToUnicodeString( "1:A customer in Burkina Faso is receiving a #8 error - U_INDEX_OUTOFBOUNDS_ERROR. Their telephone call is costing 2,32\\u00A0DM."); break; case 2: statusToCheck= U_MEMORY_ALLOCATION_ERROR; patternToCheck= "2:user in {2} is receiving a #{0} error - {1}. " "They insist they just spent {3,number,currency} " "on memory."; // number,currency messageLocale= Locale("de","AT@currency=ATS"); // Austrian German countryToCheck= Locale("","US"); // hmm currencyToCheck= 40193.12; expected= CharsToUnicodeString( "2:user in Vereinigte Staaten is receiving a #7 error" " - U_MEMORY_ALLOCATION_ERROR. They insist they just spent" " \\u00f6S\\u00A040.193,12 on memory."); break; } UnicodeString result; UErrorCode status = U_ZERO_ERROR; formatErrorMessage(status,patternToCheck,messageLocale,statusToCheck, countryToCheck,currencyToCheck,result); if(U_FAILURE(status)) { UnicodeString tmp(u_errorName(status)); error("Failure on message format, pattern=" + patternToCheck + ", error = " + tmp); goto cleanupAndReturn; } if(result != expected) { error("PatternFormat: \n" + showDifference(expected,result)); goto cleanupAndReturn; } } /* end of for loop */ cleanupAndReturn: // while (fNum == 4) {SimpleThread::sleep(10000);} // Force a failure by preventing thread from finishing fTraceInfo = 2; }
int main(int argc, char* argv[]) { UErrorCode status = U_ZERO_ERROR; const char *arg = NULL; const char *outputDir = NULL; /* NULL = no output directory, use current */ const char *inputDir = NULL; const char *encoding = ""; int i; UBool illegalArg = FALSE; U_MAIN_INIT_ARGS(argc, argv); options[JAVA_PACKAGE].value = "com.ibm.icu.impl.data"; options[BUNDLE_NAME].value = "LocaleElements"; argc = u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options); /* error handling, printing usage message */ if(argc<0) { fprintf(stderr, "%s: error in command line argument \"%s\"\n", argv[0], argv[-argc]); illegalArg = TRUE; } else if(argc<2) { illegalArg = TRUE; } if(options[WRITE_POOL_BUNDLE].doesOccur && options[USE_POOL_BUNDLE].doesOccur) { fprintf(stderr, "%s: cannot combine --writePoolBundle and --usePoolBundle\n", argv[0]); illegalArg = TRUE; } if(options[FORMAT_VERSION].doesOccur) { const char *s = options[FORMAT_VERSION].value; if(uprv_strlen(s) != 1 || (s[0] < '1' && '3' < s[0])) { fprintf(stderr, "%s: unsupported --formatVersion %s\n", argv[0], s); illegalArg = TRUE; } else if(s[0] == '1' && (options[WRITE_POOL_BUNDLE].doesOccur || options[USE_POOL_BUNDLE].doesOccur) ) { fprintf(stderr, "%s: cannot combine --formatVersion 1 with --writePoolBundle or --usePoolBundle\n", argv[0]); illegalArg = TRUE; } else { setFormatVersion(s[0] - '0'); } } if((options[JAVA_PACKAGE].doesOccur || options[BUNDLE_NAME].doesOccur) && !options[WRITE_JAVA].doesOccur) { fprintf(stderr, "%s error: command line argument --java-package or --bundle-name " "without --write-java\n", argv[0]); illegalArg = TRUE; } if(options[VERSION].doesOccur) { fprintf(stderr, "%s version %s (ICU version %s).\n" "%s\n", argv[0], GENRB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING); if(!illegalArg) { return U_ZERO_ERROR; } } if(illegalArg || options[HELP1].doesOccur || options[HELP2].doesOccur) { /* * Broken into chunks because the C89 standard says the minimum * required supported string length is 509 bytes. */ fprintf(stderr, "Usage: %s [OPTIONS] [FILES]\n" "\tReads the list of resource bundle source files and creates\n" "\tbinary version of resource bundles (.res files)\n", argv[0]); fprintf(stderr, "Options:\n" "\t-h or -? or --help this usage text\n" "\t-q or --quiet do not display warnings\n" "\t-v or --verbose print extra information when processing files\n" "\t-V or --version prints out version number and exits\n" "\t-c or --copyright include copyright notice\n"); fprintf(stderr, "\t-e or --encoding encoding of source files\n" "\t-d of --destdir destination directory, followed by the path, defaults to %s\n" "\t-s or --sourcedir source directory for files followed by path, defaults to %s\n" "\t-i or --icudatadir directory for locating any needed intermediate data files,\n" "\t followed by path, defaults to %s\n", u_getDataDirectory(), u_getDataDirectory(), u_getDataDirectory()); fprintf(stderr, "\t-j or --write-java write a Java ListResourceBundle for ICU4J, followed by optional encoding\n" "\t defaults to ASCII and \\uXXXX format.\n" "\t --java-package For --write-java: package name for writing the ListResourceBundle,\n" "\t defaults to com.ibm.icu.impl.data\n"); fprintf(stderr, "\t-b or --bundle-name For --write-java: root resource bundle name for writing the ListResourceBundle,\n" "\t defaults to LocaleElements\n" "\t-x or --write-xliff write an XLIFF file for the resource bundle. Followed by\n" "\t an optional output file name.\n" "\t-k or --strict use pedantic parsing of syntax\n" /*added by Jing*/ "\t-l or --language for XLIFF: language code compliant with BCP 47.\n"); fprintf(stderr, "\t-C or --noBinaryCollation do not generate binary collation image;\n" "\t makes .res file smaller but collator instantiation much slower;\n" "\t maintains ability to get tailoring rules\n" "\t-R or --omitCollationRules do not include collation (tailoring) rules;\n" "\t makes .res file smaller and maintains collator instantiation speed\n" "\t but tailoring rules will not be available (they are rarely used)\n"); fprintf(stderr, "\t --formatVersion write a .res file compatible with the requested formatVersion (single digit);\n" "\t for example, --formatVersion 1\n"); fprintf(stderr, "\t --writePoolBundle write a pool.res file with all of the keys of all input bundles\n" "\t --usePoolBundle [path-to-pool.res] point to keys from the pool.res keys pool bundle if they are available there;\n" "\t makes .res files smaller but dependent on the pool bundle\n" "\t (--writePoolBundle and --usePoolBundle cannot be combined)\n"); return illegalArg ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } if(options[VERBOSE].doesOccur) { setVerbose(TRUE); } if(options[QUIET].doesOccur) { setShowWarning(FALSE); } if(options[STRICT].doesOccur) { setStrict(TRUE); } if(options[COPYRIGHT].doesOccur){ setIncludeCopyright(TRUE); } if(options[SOURCEDIR].doesOccur) { inputDir = options[SOURCEDIR].value; } if(options[DESTDIR].doesOccur) { outputDir = options[DESTDIR].value; } if(options[ENCODING].doesOccur) { encoding = options[ENCODING].value; } if(options[ICUDATADIR].doesOccur) { u_setDataDirectory(options[ICUDATADIR].value); } /* Initialize ICU */ u_init(&status); if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) { /* Note: u_init() will try to open ICU property data. * failures here are expected when building ICU from scratch. * ignore them. */ fprintf(stderr, "%s: can not initialize ICU. status = %s\n", argv[0], u_errorName(status)); exit(1); } status = U_ZERO_ERROR; if(options[WRITE_JAVA].doesOccur) { write_java = TRUE; outputEnc = options[WRITE_JAVA].value; } if(options[WRITE_XLIFF].doesOccur) { write_xliff = TRUE; if(options[WRITE_XLIFF].value != NULL){ xliffOutputFileName = options[WRITE_XLIFF].value; } } initParser(); /*added by Jing*/ if(options[LANGUAGE].doesOccur) { language = options[LANGUAGE].value; } LocalPointer<SRBRoot> newPoolBundle; if(options[WRITE_POOL_BUNDLE].doesOccur) { newPoolBundle.adoptInsteadAndCheckErrorCode(new SRBRoot(NULL, TRUE, status), status); if(U_FAILURE(status)) { fprintf(stderr, "unable to create an empty bundle for the pool keys: %s\n", u_errorName(status)); return status; } else { const char *poolResName = "pool.res"; char *nameWithoutSuffix = static_cast<char *>(uprv_malloc(uprv_strlen(poolResName) + 1)); if (nameWithoutSuffix == NULL) { fprintf(stderr, "out of memory error\n"); return U_MEMORY_ALLOCATION_ERROR; } uprv_strcpy(nameWithoutSuffix, poolResName); *uprv_strrchr(nameWithoutSuffix, '.') = 0; newPoolBundle->fLocale = nameWithoutSuffix; } } if(options[USE_POOL_BUNDLE].doesOccur) { const char *poolResName = "pool.res"; FileStream *poolFile; int32_t poolFileSize; int32_t indexLength; /* * TODO: Consolidate inputDir/filename handling from main() and processFile() * into a common function, and use it here as well. * Try to create toolutil functions for dealing with dir/filenames and * loading ICU data files without udata_open(). * Share code with icupkg? * Also, make_res_filename() seems to be unused. Review and remove. */ CharString poolFileName; if (options[USE_POOL_BUNDLE].value!=NULL) { poolFileName.append(options[USE_POOL_BUNDLE].value, status); } else if (inputDir) { poolFileName.append(inputDir, status); } poolFileName.appendPathPart(poolResName, status); if (U_FAILURE(status)) { return status; } poolFile = T_FileStream_open(poolFileName.data(), "rb"); if (poolFile == NULL) { fprintf(stderr, "unable to open pool bundle file %s\n", poolFileName.data()); return 1; } poolFileSize = T_FileStream_size(poolFile); if (poolFileSize < 32) { fprintf(stderr, "the pool bundle file %s is too small\n", poolFileName.data()); return 1; } poolBundle.fBytes = new uint8_t[(poolFileSize + 15) & ~15]; if (poolFileSize > 0 && poolBundle.fBytes == NULL) { fprintf(stderr, "unable to allocate memory for the pool bundle file %s\n", poolFileName.data()); return U_MEMORY_ALLOCATION_ERROR; } UDataSwapper *ds; const DataHeader *header; int32_t bytesRead = T_FileStream_read(poolFile, poolBundle.fBytes, poolFileSize); if (bytesRead != poolFileSize) { fprintf(stderr, "unable to read the pool bundle file %s\n", poolFileName.data()); return 1; } /* * Swap the pool bundle so that a single checked-in file can be used. * The swapper functions also test that the data looks like * a well-formed .res file. */ ds = udata_openSwapperForInputData(poolBundle.fBytes, bytesRead, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &status); if (U_FAILURE(status)) { fprintf(stderr, "udata_openSwapperForInputData(pool bundle %s) failed: %s\n", poolFileName.data(), u_errorName(status)); return status; } ures_swap(ds, poolBundle.fBytes, bytesRead, poolBundle.fBytes, &status); udata_closeSwapper(ds); if (U_FAILURE(status)) { fprintf(stderr, "ures_swap(pool bundle %s) failed: %s\n", poolFileName.data(), u_errorName(status)); return status; } header = (const DataHeader *)poolBundle.fBytes; if (header->info.formatVersion[0] < 2) { fprintf(stderr, "invalid format of pool bundle file %s\n", poolFileName.data()); return U_INVALID_FORMAT_ERROR; } const int32_t *pRoot = (const int32_t *)( (const char *)header + header->dataHeader.headerSize); poolBundle.fIndexes = pRoot + 1; indexLength = poolBundle.fIndexes[URES_INDEX_LENGTH] & 0xff; if (indexLength <= URES_INDEX_POOL_CHECKSUM) { fprintf(stderr, "insufficient indexes[] in pool bundle file %s\n", poolFileName.data()); return U_INVALID_FORMAT_ERROR; } int32_t keysBottom = 1 + indexLength; int32_t keysTop = poolBundle.fIndexes[URES_INDEX_KEYS_TOP]; poolBundle.fKeys = (const char *)(pRoot + keysBottom); poolBundle.fKeysLength = (keysTop - keysBottom) * 4; poolBundle.fChecksum = poolBundle.fIndexes[URES_INDEX_POOL_CHECKSUM]; for (i = 0; i < poolBundle.fKeysLength; ++i) { if (poolBundle.fKeys[i] == 0) { ++poolBundle.fKeysCount; } } // 16BitUnits[] begins with strings-v2. // The strings-v2 may optionally be terminated by what looks like // an explicit string length that exceeds the number of remaining 16-bit units. int32_t stringUnitsLength = (poolBundle.fIndexes[URES_INDEX_16BIT_TOP] - keysTop) * 2; if (stringUnitsLength >= 2 && getFormatVersion() >= 3) { poolBundle.fStrings = new PseudoListResource(NULL, status); if (poolBundle.fStrings == NULL) { fprintf(stderr, "unable to allocate memory for the pool bundle strings %s\n", poolFileName.data()); return U_MEMORY_ALLOCATION_ERROR; } // The PseudoListResource constructor call did not allocate further memory. assert(U_SUCCESS(status)); const UChar *p = (const UChar *)(pRoot + keysTop); int32_t remaining = stringUnitsLength; do { int32_t first = *p; int8_t numCharsForLength; int32_t length; if (!U16_IS_TRAIL(first)) { // NUL-terminated numCharsForLength = 0; for (length = 0; length < remaining && p[length] != 0; ++length) {} } else if (first < 0xdfef) { numCharsForLength = 1; length = first & 0x3ff; } else if (first < 0xdfff && remaining >= 2) { numCharsForLength = 2; length = ((first - 0xdfef) << 16) | p[1]; } else if (first == 0xdfff && remaining >= 3) { numCharsForLength = 3; length = ((int32_t)p[1] << 16) | p[2]; } else { break; // overrun } // Check for overrun before changing remaining, // so that it is always accurate after the loop body. if ((numCharsForLength + length) >= remaining || p[numCharsForLength + length] != 0) { break; // overrun or explicitly terminated } int32_t poolStringIndex = stringUnitsLength - remaining; // Maximum pool string index when suffix-sharing the last character. int32_t maxStringIndex = poolStringIndex + numCharsForLength + length - 1; if (maxStringIndex >= RES_MAX_OFFSET) { // pool string index overrun break; } p += numCharsForLength; remaining -= numCharsForLength; if (length != 0) { StringResource *sr = new StringResource(poolStringIndex, numCharsForLength, p, length, status); if (sr == NULL) { fprintf(stderr, "unable to allocate memory for a pool bundle string %s\n", poolFileName.data()); return U_MEMORY_ALLOCATION_ERROR; } poolBundle.fStrings->add(sr); poolBundle.fStringIndexLimit = maxStringIndex + 1; // The StringResource constructor did not allocate further memory. assert(U_SUCCESS(status)); } p += length + 1; remaining -= length + 1; } while (remaining > 0); if (poolBundle.fStrings->fCount == 0) { delete poolBundle.fStrings; poolBundle.fStrings = NULL; } } T_FileStream_close(poolFile); setUsePoolBundle(TRUE); if (isVerbose() && poolBundle.fStrings != NULL) { printf("number of shared strings: %d\n", (int)poolBundle.fStrings->fCount); int32_t length = poolBundle.fStringIndexLimit + 1; // incl. last NUL printf("16-bit units for strings: %6d = %6d bytes\n", (int)length, (int)length * 2); } } if(!options[FORMAT_VERSION].doesOccur && getFormatVersion() == 3 && poolBundle.fStrings == NULL && !options[WRITE_POOL_BUNDLE].doesOccur) { // If we just default to formatVersion 3 // but there are no pool bundle strings to share // and we do not write a pool bundle, // then write formatVersion 2 which is just as good. setFormatVersion(2); } if(options[INCLUDE_UNIHAN_COLL].doesOccur) { puts("genrb option --includeUnihanColl ignored: \n" "CLDR 26/ICU 54 unihan data is small, except\n" "the ucadata-unihan.icu version of the collation root data\n" "is about 300kB larger than the ucadata-implicithan.icu version."); } if((argc-1)!=1) { printf("genrb number of files: %d\n", argc - 1); } /* generate the binary files */ for(i = 1; i < argc; ++i) { status = U_ZERO_ERROR; arg = getLongPathname(argv[i]); CharString theCurrentFileName; if (inputDir) { theCurrentFileName.append(inputDir, status); } theCurrentFileName.appendPathPart(arg, status); if (U_FAILURE(status)) { break; } gCurrentFileName = theCurrentFileName.data(); if (isVerbose()) { printf("Processing file \"%s\"\n", theCurrentFileName.data()); } processFile(arg, encoding, inputDir, outputDir, NULL, newPoolBundle.getAlias(), options[NO_BINARY_COLLATION].doesOccur, status); } poolBundle.close(); if(U_SUCCESS(status) && options[WRITE_POOL_BUNDLE].doesOccur) { char outputFileName[256]; newPoolBundle->write(outputDir, NULL, outputFileName, sizeof(outputFileName), status); if(U_FAILURE(status)) { fprintf(stderr, "unable to write the pool bundle: %s\n", u_errorName(status)); } } u_cleanup(); /* Dont return warnings as a failure */ if (U_SUCCESS(status)) { return 0; } return status; }
const MicroPropsGenerator* NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, UErrorCode& status) { if (U_FAILURE(status)) { return nullptr; } const MicroPropsGenerator* chain = &fMicros; // Check that macros is error-free before continuing. if (macros.copyErrorTo(status)) { return nullptr; } // TODO: Accept currency symbols from DecimalFormatSymbols? // Pre-compute a few values for efficiency. bool isCurrency = utils::unitIsCurrency(macros.unit); bool isNoUnit = utils::unitIsNoUnit(macros.unit); bool isPercent = isNoUnit && utils::unitIsPercent(macros.unit); bool isPermille = isNoUnit && utils::unitIsPermille(macros.unit); bool isCldrUnit = !isCurrency && !isNoUnit; bool isAccounting = macros.sign == UNUM_SIGN_ACCOUNTING || macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS || macros.sign == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO; CurrencyUnit currency(nullptr, status); if (isCurrency) { currency = CurrencyUnit(macros.unit, status); // Restore CurrencyUnit from MeasureUnit } const CurrencySymbols* currencySymbols; if (macros.currencySymbols != nullptr) { // Used by the DecimalFormat code path currencySymbols = macros.currencySymbols; } else { fWarehouse.fCurrencySymbols = {currency, macros.locale, status}; currencySymbols = &fWarehouse.fCurrencySymbols; } UNumberUnitWidth unitWidth = UNUM_UNIT_WIDTH_SHORT; if (macros.unitWidth != UNUM_UNIT_WIDTH_COUNT) { unitWidth = macros.unitWidth; } // Select the numbering system. LocalPointer<const NumberingSystem> nsLocal; const NumberingSystem* ns; if (macros.symbols.isNumberingSystem()) { ns = macros.symbols.getNumberingSystem(); } else { // TODO: Is there a way to avoid creating the NumberingSystem object? ns = NumberingSystem::createInstance(macros.locale, status); // Give ownership to the function scope. nsLocal.adoptInstead(ns); } const char* nsName = U_SUCCESS(status) ? ns->getName() : "latn"; // Resolve the symbols. Do this here because currency may need to customize them. if (macros.symbols.isDecimalFormatSymbols()) { fMicros.symbols = macros.symbols.getDecimalFormatSymbols(); } else { fMicros.symbols = new DecimalFormatSymbols(macros.locale, *ns, status); // Give ownership to the NumberFormatterImpl. fSymbols.adoptInstead(fMicros.symbols); } // Load and parse the pattern string. It is used for grouping sizes and affixes only. // If we are formatting currency, check for a currency-specific pattern. const char16_t* pattern = nullptr; if (isCurrency) { CurrencyFormatInfoResult info = getCurrencyFormatInfo( macros.locale, currency.getSubtype(), status); if (info.exists) { pattern = info.pattern; // It's clunky to clone an object here, but this code is not frequently executed. auto* symbols = new DecimalFormatSymbols(*fMicros.symbols); fMicros.symbols = symbols; fSymbols.adoptInstead(symbols); symbols->setSymbol( DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol, UnicodeString(info.decimalSeparator), FALSE); symbols->setSymbol( DecimalFormatSymbols::ENumberFormatSymbol::kMonetaryGroupingSeparatorSymbol, UnicodeString(info.groupingSeparator), FALSE); } } if (pattern == nullptr) { CldrPatternStyle patternStyle; if (isPercent || isPermille) { patternStyle = CLDR_PATTERN_STYLE_PERCENT; } else if (!isCurrency || unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) { patternStyle = CLDR_PATTERN_STYLE_DECIMAL; } else if (isAccounting) { // NOTE: Although ACCOUNTING and ACCOUNTING_ALWAYS are only supported in currencies right now, // the API contract allows us to add support to other units in the future. patternStyle = CLDR_PATTERN_STYLE_ACCOUNTING; } else { patternStyle = CLDR_PATTERN_STYLE_CURRENCY; } pattern = utils::getPatternForStyle(macros.locale, nsName, patternStyle, status); } auto patternInfo = new ParsedPatternInfo(); fPatternInfo.adoptInstead(patternInfo); PatternParser::parseToPatternInfo(UnicodeString(pattern), *patternInfo, status); ///////////////////////////////////////////////////////////////////////////////////// /// START POPULATING THE DEFAULT MICROPROPS AND BUILDING THE MICROPROPS GENERATOR /// ///////////////////////////////////////////////////////////////////////////////////// // Multiplier if (macros.scale.isValid()) { fMicros.helpers.multiplier.setAndChain(macros.scale, chain); chain = &fMicros.helpers.multiplier; } // Rounding strategy Precision precision; if (!macros.precision.isBogus()) { precision = macros.precision; } else if (macros.notation.fType == Notation::NTN_COMPACT) { precision = Precision::integer().withMinDigits(2); } else if (isCurrency) { precision = Precision::currency(UCURR_USAGE_STANDARD); } else { precision = Precision::maxFraction(6); } UNumberFormatRoundingMode roundingMode; if (macros.roundingMode != kDefaultMode) { roundingMode = macros.roundingMode; } else { // Temporary until ICU 64 roundingMode = precision.fRoundingMode; } fMicros.rounder = {precision, roundingMode, currency, status}; // Grouping strategy if (!macros.grouper.isBogus()) { fMicros.grouping = macros.grouper; } else if (macros.notation.fType == Notation::NTN_COMPACT) { // Compact notation uses minGrouping by default since ICU 59 fMicros.grouping = Grouper::forStrategy(UNUM_GROUPING_MIN2); } else { fMicros.grouping = Grouper::forStrategy(UNUM_GROUPING_AUTO); } fMicros.grouping.setLocaleData(*fPatternInfo, macros.locale); // Padding strategy if (!macros.padder.isBogus()) { fMicros.padding = macros.padder; } else { fMicros.padding = Padder::none(); } // Integer width if (!macros.integerWidth.isBogus()) { fMicros.integerWidth = macros.integerWidth; } else { fMicros.integerWidth = IntegerWidth::standard(); } // Sign display if (macros.sign != UNUM_SIGN_COUNT) { fMicros.sign = macros.sign; } else { fMicros.sign = UNUM_SIGN_AUTO; } // Decimal mark display if (macros.decimal != UNUM_DECIMAL_SEPARATOR_COUNT) { fMicros.decimal = macros.decimal; } else { fMicros.decimal = UNUM_DECIMAL_SEPARATOR_AUTO; } // Use monetary separator symbols fMicros.useCurrency = isCurrency; // Inner modifier (scientific notation) if (macros.notation.fType == Notation::NTN_SCIENTIFIC) { fScientificHandler.adoptInstead(new ScientificHandler(¯os.notation, fMicros.symbols, chain)); chain = fScientificHandler.getAlias(); } else { // No inner modifier required fMicros.modInner = &fMicros.helpers.emptyStrongModifier; } // Middle modifier (patterns, positive/negative, currency symbols, percent) auto patternModifier = new MutablePatternModifier(false); fPatternModifier.adoptInstead(patternModifier); patternModifier->setPatternInfo( macros.affixProvider != nullptr ? macros.affixProvider : static_cast<const AffixPatternProvider*>(fPatternInfo.getAlias())); patternModifier->setPatternAttributes(fMicros.sign, isPermille); if (patternModifier->needsPlurals()) { patternModifier->setSymbols( fMicros.symbols, currencySymbols, unitWidth, resolvePluralRules(macros.rules, macros.locale, status)); } else { patternModifier->setSymbols(fMicros.symbols, currencySymbols, unitWidth, nullptr); } if (safe) { fImmutablePatternModifier.adoptInstead(patternModifier->createImmutableAndChain(chain, status)); chain = fImmutablePatternModifier.getAlias(); } else { patternModifier->addToChain(chain); chain = patternModifier; } // Outer modifier (CLDR units and currency long names) if (isCldrUnit) { fLongNameHandler.adoptInstead( new LongNameHandler( LongNameHandler::forMeasureUnit( macros.locale, macros.unit, macros.perUnit, unitWidth, resolvePluralRules(macros.rules, macros.locale, status), chain, status))); chain = fLongNameHandler.getAlias(); } else if (isCurrency && unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) { fLongNameHandler.adoptInstead( new LongNameHandler( LongNameHandler::forCurrencyLongNames( macros.locale, currency, resolvePluralRules(macros.rules, macros.locale, status), chain, status))); chain = fLongNameHandler.getAlias(); } else { // No outer modifier required fMicros.modOuter = &fMicros.helpers.emptyWeakModifier; } // Compact notation // NOTE: Compact notation can (but might not) override the middle modifier and rounding. // It therefore needs to go at the end of the chain. if (macros.notation.fType == Notation::NTN_COMPACT) { CompactType compactType = (isCurrency && unitWidth != UNUM_UNIT_WIDTH_FULL_NAME) ? CompactType::TYPE_CURRENCY : CompactType::TYPE_DECIMAL; fCompactHandler.adoptInstead( new CompactHandler( macros.notation.fUnion.compactStyle, macros.locale, nsName, compactType, resolvePluralRules(macros.rules, macros.locale, status), safe ? patternModifier : nullptr, chain, status)); chain = fCompactHandler.getAlias(); } return chain; }
virtual int32_t following(int32_t offset) { return fDelegate->following(offset); }
virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); }
void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const { const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode); if (U_FAILURE(errorCode)) { return; } const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0); const UnicodeString &overflowBoundary = *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1); // We make a sorted array of elements. // Some of the input may be redundant. // That is, we might have c, ch, d, where "ch" sorts just like "c", "h". // We filter out those cases. UnicodeSetIterator iter(*initialLabels_); while (iter.next()) { const UnicodeString *item = &iter.getString(); LocalPointer<UnicodeString> ownedItem; UBool checkDistinct; int32_t itemLength = item->length(); if (!item->hasMoreChar32Than(0, itemLength, 1)) { checkDistinct = FALSE; } else if(item->charAt(itemLength - 1) == 0x2a && // '*' item->charAt(itemLength - 2) != 0x2a) { // Use a label if it is marked with one trailing star, // even if the label string sorts the same when all contractions are suppressed. ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1)); item = ownedItem.getAlias(); if (item == NULL) { errorCode = U_MEMORY_ALLOCATION_ERROR; return; } checkDistinct = FALSE; } else { checkDistinct = TRUE; } if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) { // Ignore a primary-ignorable or non-alphabetic index character. } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) { // Ignore an index character that will land in the overflow bucket. } else if (checkDistinct && collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) { // Ignore a multi-code point index character that does not sort distinctly // from the sequence of its separate characters. } else { int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_); if (insertionPoint < 0) { indexCharacters.insertElementAt( ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode); } else { const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint); if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) { indexCharacters.setElementAt( ownedString(*item, ownedItem, errorCode), insertionPoint); } } } } if (U_FAILURE(errorCode)) { return; } // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element int32_t size = indexCharacters.size() - 1; if (size > maxLabelCount_) { int32_t count = 0; int32_t old = -1; for (int32_t i = 0; i < indexCharacters.size();) { ++count; int32_t bump = count * maxLabelCount_ / size; if (bump == old) { indexCharacters.removeElementAt(i); } else { old = bump; ++i; } } } }
virtual int32_t last(void) { return fDelegate->last(); }
void StringCaseTest::TestCasing() { UErrorCode status = U_ZERO_ERROR; #if !UCONFIG_NO_BREAK_ITERATION LocalUBreakIteratorPointer iter; #endif char cLocaleID[100]; UnicodeString locale, input, output, optionsString, result; uint32_t options; int32_t whichCase, type; LocalPointer<TestDataModule> driver(TestDataModule::getTestDataModule("casing", *this, status)); if(U_SUCCESS(status)) { for(whichCase=0; whichCase<TEST_COUNT; ++whichCase) { #if UCONFIG_NO_BREAK_ITERATION if(whichCase==TEST_TITLE) { continue; } #endif LocalPointer<TestData> casingTest(driver->createTestData(dataNames[whichCase], status)); if(U_FAILURE(status)) { errln("TestCasing failed to createTestData(%s) - %s", dataNames[whichCase], u_errorName(status)); break; } const DataMap *myCase = NULL; while(casingTest->nextCase(myCase, status)) { input = myCase->getString("Input", status); output = myCase->getString("Output", status); if(whichCase!=TEST_FOLD) { locale = myCase->getString("Locale", status); } locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), ""); #if !UCONFIG_NO_BREAK_ITERATION if(whichCase==TEST_TITLE) { type = myCase->getInt("Type", status); if(type>=0) { iter.adoptInstead(ubrk_open((UBreakIteratorType)type, cLocaleID, NULL, 0, &status)); } else if(type==-2) { // Open a trivial break iterator that only delivers { 0, length } // or even just { 0 } as boundaries. static const UChar rules[] = { 0x2e, 0x2a, 0x3b }; // ".*;" UParseError parseError; iter.adoptInstead(ubrk_openRules(rules, LENGTHOF(rules), NULL, 0, &parseError, &status)); } } #endif options = 0; if(whichCase==TEST_TITLE || whichCase==TEST_FOLD) { optionsString = myCase->getString("Options", status); if(optionsString.indexOf((UChar)0x54)>=0) { // T options|=U_FOLD_CASE_EXCLUDE_SPECIAL_I; } if(optionsString.indexOf((UChar)0x4c)>=0) { // L options|=U_TITLECASE_NO_LOWERCASE; } if(optionsString.indexOf((UChar)0x41)>=0) { // A options|=U_TITLECASE_NO_BREAK_ADJUSTMENT; } } if(U_FAILURE(status)) { dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames[whichCase], u_errorName(status)); status = U_ZERO_ERROR; } else { #if UCONFIG_NO_BREAK_ITERATION LocalPointer<UMemory> iter; #endif TestCasingImpl(input, output, whichCase, iter.getAlias(), cLocaleID, options); } #if !UCONFIG_NO_BREAK_ITERATION iter.adoptInstead(NULL); #endif } } } #if !UCONFIG_NO_BREAK_ITERATION // more tests for API coverage status=U_ZERO_ERROR; input=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape(); (result=input).toTitle(NULL); if(result!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) { dataerrln("UnicodeString::toTitle(NULL) failed."); } #endif }
BreakIterator * SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UErrorCode& status) { LocalPointer<BreakIterator> adopt(adoptBreakIterator); if(U_FAILURE(status)) { return NULL; } LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status)); LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status)); int32_t revCount = 0; int32_t fwdCount = 0; int32_t subCount = fSet.size(); LocalArray<UnicodeString> ustrs(new UnicodeString[subCount]); LocalArray<int> partials(new int[subCount]); LocalPointer<UCharsTrie> backwardsTrie; // i.e. ".srM" for Mrs. LocalPointer<UCharsTrie> forwardsPartialTrie; // Has ".a" for "a.M." int n=0; for ( set<UnicodeString>::iterator i = fSet.begin(); i != fSet.end(); i++) { const UnicodeString &abbr = *i; ustrs[n] = abbr; partials[n] = 0; // default: not partial n++; } // first pass - find partials. for(int i=0;i<subCount;i++) { int nn = ustrs[i].indexOf(kFULLSTOP); // TODO: non-'.' abbreviations if(nn>-1 && (nn+1)!=ustrs[i].length()) { //if(true) u_printf("Is a partial: /%S/\n", ustrs[i].getTerminatedBuffer()); // is partial. // is it unique? int sameAs = -1; for(int j=0;j<subCount;j++) { if(j==i) continue; if(ustrs[i].compare(0,nn+1,ustrs[j],0,nn+1)==0) { //if(true) u_printf("Prefix match: /%S/ to %d\n", ustrs[j].getTerminatedBuffer(), nn+1); //UBool otherIsPartial = ((nn+1)!=ustrs[j].length()); // true if ustrs[j] doesn't end at nn if(partials[j]==0) { // hasn't been processed yet partials[j] = kSuppressInReverse | kAddToForward; //if(true) u_printf("Suppressing: /%S/\n", ustrs[j].getTerminatedBuffer()); } else if(partials[j] & kSuppressInReverse) { sameAs = j; // the other entry is already in the reverse table. } } } //if(debug2) u_printf("for partial /%S/ same=%d partials=%d\n", ustrs[i].getTerminatedBuffer(), sameAs, partials[i]); UnicodeString prefix(ustrs[i], 0, nn+1); if(sameAs == -1 && partials[i] == 0) { // first one - add the prefix to the reverse table. prefix.reverse(); builder->add(prefix, kPARTIAL, status); revCount++; //if(debug2) u_printf("Added Partial: /%S/ from /%S/ status=%s\n", prefix.getTerminatedBuffer(), ustrs[i].getTerminatedBuffer(), u_errorName(status)); partials[i] = kSuppressInReverse | kAddToForward; } else { //if(debug2) u_printf(" // not adding partial for /%S/ from /%S/\n", prefix.getTerminatedBuffer(), ustrs[i].getTerminatedBuffer()); } } } for(int i=0;i<subCount;i++) { if(partials[i]==0) { ustrs[i].reverse(); builder->add(ustrs[i], kMATCH, status); revCount++; //if(debug2) u_printf("Added: /%S/ status=%s\n", ustrs[i].getTerminatedBuffer(), u_errorName(status)); } else { //if(debug2) u_printf(" Adding fwd: /%S/\n", ustrs[i].getTerminatedBuffer()); // an optimization would be to only add the portion after the '.' // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward, // instead of "Ph.D." since we already know the "Ph." part is a match. // would need the trie to be able to hold 0-length strings, though. builder2->add(ustrs[i], kMATCH, status); // forward fwdCount++; //ustrs[i].reverse(); ////if(debug2) u_printf("SUPPRESS- not Added(%d): /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status)); } } //if(debug) u_printf(" %s has %d abbrs.\n", fJSONSource.c_str(), subCount); if(revCount>0) { backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status)); if(U_FAILURE(status)) { //printf("Error %s building backwards\n", u_errorName(status)); return NULL; } } if(fwdCount>0) { forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status)); if(U_FAILURE(status)) { //printf("Error %s building forwards\n", u_errorName(status)); return NULL; } } return new ULISentenceBreakIterator(adopt.orphan(), forwardsPartialTrie.orphan(), backwardsTrie.orphan(), status); }