/* * Initializes the region data from the ICU resource bundles. The region data * contains the basic relationships such as which regions are known, what the numeric * codes are, any known aliases, and the territory containment data. * * If the region data has already loaded, then this method simply returns without doing * anything meaningful. */ void Region::loadRegionData(UErrorCode &status) { // Construct service objs first LocalUHashtablePointer newRegionIDMap(uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status)); LocalUHashtablePointer newNumericCodeMap(uhash_open(uhash_hashLong,uhash_compareLong,NULL,&status)); LocalUHashtablePointer newRegionAliases(uhash_open(uhash_hashUnicodeString,uhash_compareUnicodeString,NULL,&status)); LocalPointer<DecimalFormat> df(new DecimalFormat(status), status); LocalPointer<UVector> continents(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status); LocalPointer<UVector> groupings(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status); allRegions = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); LocalUResourceBundlePointer metadata(ures_openDirect(NULL,"metadata",&status)); LocalUResourceBundlePointer metadataAlias(ures_getByKey(metadata.getAlias(),"alias",NULL,&status)); LocalUResourceBundlePointer territoryAlias(ures_getByKey(metadataAlias.getAlias(),"territory",NULL,&status)); LocalUResourceBundlePointer supplementalData(ures_openDirect(NULL,"supplementalData",&status)); LocalUResourceBundlePointer codeMappings(ures_getByKey(supplementalData.getAlias(),"codeMappings",NULL,&status)); LocalUResourceBundlePointer idValidity(ures_getByKey(supplementalData.getAlias(),"idValidity",NULL,&status)); LocalUResourceBundlePointer regionList(ures_getByKey(idValidity.getAlias(),"region",NULL,&status)); LocalUResourceBundlePointer regionRegular(ures_getByKey(regionList.getAlias(),"regular",NULL,&status)); LocalUResourceBundlePointer regionMacro(ures_getByKey(regionList.getAlias(),"macroregion",NULL,&status)); LocalUResourceBundlePointer regionUnknown(ures_getByKey(regionList.getAlias(),"unknown",NULL,&status)); LocalUResourceBundlePointer territoryContainment(ures_getByKey(supplementalData.getAlias(),"territoryContainment",NULL,&status)); LocalUResourceBundlePointer worldContainment(ures_getByKey(territoryContainment.getAlias(),"001",NULL,&status)); LocalUResourceBundlePointer groupingContainment(ures_getByKey(territoryContainment.getAlias(),"grouping",NULL,&status)); if (U_FAILURE(status)) { return; } // now, initialize df->setParseIntegerOnly(TRUE); uhash_setValueDeleter(newRegionIDMap.getAlias(), deleteRegion); // regionIDMap owns objs uhash_setKeyDeleter(newRegionAliases.getAlias(), uprv_deleteUObject); // regionAliases owns the string keys while ( ures_hasNext(regionRegular.getAlias()) ) { UnicodeString regionName = ures_getNextUnicodeString(regionRegular.getAlias(),NULL,&status); int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER); UChar buf[6]; regionName.extract(buf,6,status); if ( rangeMarkerLocation > 0 ) { UChar endRange = regionName.charAt(rangeMarkerLocation+1); buf[rangeMarkerLocation] = 0; while ( buf[rangeMarkerLocation-1] <= endRange ) { LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status); allRegions->addElement(newRegion.orphan(),status); buf[rangeMarkerLocation-1]++; } } else { LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status); allRegions->addElement(newRegion.orphan(),status); } } while ( ures_hasNext(regionMacro.getAlias()) ) { UnicodeString regionName = ures_getNextUnicodeString(regionMacro.getAlias(),NULL,&status); int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER); UChar buf[6]; regionName.extract(buf,6,status); if ( rangeMarkerLocation > 0 ) { UChar endRange = regionName.charAt(rangeMarkerLocation+1); buf[rangeMarkerLocation] = 0; while ( buf[rangeMarkerLocation-1] <= endRange ) { LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status); allRegions->addElement(newRegion.orphan(),status); buf[rangeMarkerLocation-1]++; } } else { LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status); allRegions->addElement(newRegion.orphan(),status); } } while ( ures_hasNext(regionUnknown.getAlias()) ) { LocalPointer<UnicodeString> regionName (new UnicodeString(ures_getNextUnicodeString(regionUnknown.getAlias(),NULL,&status),status)); allRegions->addElement(regionName.orphan(),status); } while ( ures_hasNext(worldContainment.getAlias()) ) { UnicodeString *continentName = new UnicodeString(ures_getNextUnicodeString(worldContainment.getAlias(),NULL,&status)); continents->addElement(continentName,status); } while ( ures_hasNext(groupingContainment.getAlias()) ) { UnicodeString *groupingName = new UnicodeString(ures_getNextUnicodeString(groupingContainment.getAlias(),NULL,&status)); groupings->addElement(groupingName,status); } for ( int32_t i = 0 ; i < allRegions->size() ; i++ ) { LocalPointer<Region> r(new Region(), status); if ( U_FAILURE(status) ) { return; } UnicodeString *regionName = (UnicodeString *)allRegions->elementAt(i); r->idStr = *regionName; r->idStr.extract(0,r->idStr.length(),r->id,sizeof(r->id),US_INV); r->type = URGN_TERRITORY; // Only temporary - figure out the real type later once the aliases are known. Formattable result; UErrorCode ps = U_ZERO_ERROR; df->parse(r->idStr,result,ps); if ( U_SUCCESS(ps) ) { r->code = result.getLong(); // Convert string to number uhash_iput(newNumericCodeMap.getAlias(),r->code,(void *)(r.getAlias()),&status); r->type = URGN_SUBCONTINENT; } else { r->code = -1; } void* idStrAlias = (void*)&(r->idStr); // about to orphan 'r'. Save this off. uhash_put(newRegionIDMap.getAlias(),idStrAlias,(void *)(r.orphan()),&status); // regionIDMap takes ownership } // Process the territory aliases while ( ures_hasNext(territoryAlias.getAlias()) ) { LocalUResourceBundlePointer res(ures_getNextResource(territoryAlias.getAlias(),NULL,&status)); const char *aliasFrom = ures_getKey(res.getAlias()); LocalPointer<UnicodeString> aliasFromStr(new UnicodeString(aliasFrom, -1, US_INV), status); UnicodeString aliasTo = ures_getUnicodeStringByKey(res.getAlias(),"replacement",&status); res.adoptInstead(NULL); const Region *aliasToRegion = (Region *) uhash_get(newRegionIDMap.getAlias(),&aliasTo); Region *aliasFromRegion = (Region *)uhash_get(newRegionIDMap.getAlias(),aliasFromStr.getAlias()); if ( aliasToRegion != NULL && aliasFromRegion == NULL ) { // This is just an alias from some string to a region uhash_put(newRegionAliases.getAlias(),(void *)aliasFromStr.orphan(), (void *)aliasToRegion,&status); } else { if ( aliasFromRegion == NULL ) { // Deprecated region code not in the master codes list - so need to create a deprecated region for it. LocalPointer<Region> newRgn(new Region, status); if ( U_SUCCESS(status) ) { aliasFromRegion = newRgn.orphan(); } else { return; // error out } aliasFromRegion->idStr.setTo(*aliasFromStr); aliasFromRegion->idStr.extract(0,aliasFromRegion->idStr.length(),aliasFromRegion->id,sizeof(aliasFromRegion->id),US_INV); uhash_put(newRegionIDMap.getAlias(),(void *)&(aliasFromRegion->idStr),(void *)aliasFromRegion,&status); Formattable result; UErrorCode ps = U_ZERO_ERROR; df->parse(aliasFromRegion->idStr,result,ps); if ( U_SUCCESS(ps) ) { aliasFromRegion->code = result.getLong(); // Convert string to number uhash_iput(newNumericCodeMap.getAlias(),aliasFromRegion->code,(void *)aliasFromRegion,&status); } else { aliasFromRegion->code = -1; } aliasFromRegion->type = URGN_DEPRECATED; } else { aliasFromRegion->type = URGN_DEPRECATED; } { LocalPointer<UVector> newPreferredValues(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status); aliasFromRegion->preferredValues = newPreferredValues.orphan(); } if( U_FAILURE(status)) { return; } UnicodeString currentRegion; //currentRegion.remove(); TODO: was already 0 length? for (int32_t i = 0 ; i < aliasTo.length() ; i++ ) { if ( aliasTo.charAt(i) != 0x0020 ) { currentRegion.append(aliasTo.charAt(i)); } if ( aliasTo.charAt(i) == 0x0020 || i+1 == aliasTo.length() ) { Region *target = (Region *)uhash_get(newRegionIDMap.getAlias(),(void *)¤tRegion); if (target) { LocalPointer<UnicodeString> preferredValue(new UnicodeString(target->idStr), status); aliasFromRegion->preferredValues->addElement((void *)preferredValue.orphan(),status); // may add null if err } currentRegion.remove(); } } } } // Process the code mappings - This will allow us to assign numeric codes to most of the territories. while ( ures_hasNext(codeMappings.getAlias()) ) { UResourceBundle *mapping = ures_getNextResource(codeMappings.getAlias(),NULL,&status); if ( ures_getType(mapping) == URES_ARRAY && ures_getSize(mapping) == 3) { UnicodeString codeMappingID = ures_getUnicodeStringByIndex(mapping,0,&status); UnicodeString codeMappingNumber = ures_getUnicodeStringByIndex(mapping,1,&status); UnicodeString codeMapping3Letter = ures_getUnicodeStringByIndex(mapping,2,&status); Region *r = (Region *)uhash_get(newRegionIDMap.getAlias(),(void *)&codeMappingID); if ( r ) { Formattable result; UErrorCode ps = U_ZERO_ERROR; df->parse(codeMappingNumber,result,ps); if ( U_SUCCESS(ps) ) { r->code = result.getLong(); // Convert string to number uhash_iput(newNumericCodeMap.getAlias(),r->code,(void *)r,&status); } LocalPointer<UnicodeString> code3(new UnicodeString(codeMapping3Letter), status); uhash_put(newRegionAliases.getAlias(),(void *)code3.orphan(), (void *)r,&status); } } ures_close(mapping); } // Now fill in the special cases for WORLD, UNKNOWN, CONTINENTS, and GROUPINGS Region *r; UnicodeString WORLD_ID_STRING(WORLD_ID); r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&WORLD_ID_STRING); if ( r ) { r->type = URGN_WORLD; } UnicodeString UNKNOWN_REGION_ID_STRING(UNKNOWN_REGION_ID); r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&UNKNOWN_REGION_ID_STRING); if ( r ) { r->type = URGN_UNKNOWN; } for ( int32_t i = 0 ; i < continents->size() ; i++ ) { r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)continents->elementAt(i)); if ( r ) { r->type = URGN_CONTINENT; } } for ( int32_t i = 0 ; i < groupings->size() ; i++ ) { r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)groupings->elementAt(i)); if ( r ) { r->type = URGN_GROUPING; } } // Special case: The region code "QO" (Outlying Oceania) is a subcontinent code added by CLDR // even though it looks like a territory code. Need to handle it here. UnicodeString OUTLYING_OCEANIA_REGION_ID_STRING(OUTLYING_OCEANIA_REGION_ID); r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&OUTLYING_OCEANIA_REGION_ID_STRING); if ( r ) { r->type = URGN_SUBCONTINENT; } // Load territory containment info from the supplemental data. while ( ures_hasNext(territoryContainment.getAlias()) ) { LocalUResourceBundlePointer mapping(ures_getNextResource(territoryContainment.getAlias(),NULL,&status)); if( U_FAILURE(status) ) { return; // error out } const char *parent = ures_getKey(mapping.getAlias()); if (uprv_strcmp(parent, "containedGroupings") == 0 || uprv_strcmp(parent, "deprecated") == 0) { continue; // handle new pseudo-parent types added in ICU data per cldrbug 7808; for now just skip. // #11232 is to do something useful with these. } UnicodeString parentStr = UnicodeString(parent, -1 , US_INV); Region *parentRegion = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&parentStr); for ( int j = 0 ; j < ures_getSize(mapping.getAlias()); j++ ) { UnicodeString child = ures_getUnicodeStringByIndex(mapping.getAlias(),j,&status); Region *childRegion = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&child); if ( parentRegion != NULL && childRegion != NULL ) { // Add the child region to the set of regions contained by the parent if (parentRegion->containedRegions == NULL) { parentRegion->containedRegions = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); } LocalPointer<UnicodeString> childStr(new UnicodeString(), status); if( U_FAILURE(status) ) { return; // error out } childStr->fastCopyFrom(childRegion->idStr); parentRegion->containedRegions->addElement((void *)childStr.orphan(),status); // Set the parent region to be the containing region of the child. // Regions of type GROUPING can't be set as the parent, since another region // such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent. if ( parentRegion->type != URGN_GROUPING) { childRegion->containingRegion = parentRegion; } } } } // Create the availableRegions lists int32_t pos = UHASH_FIRST; while ( const UHashElement* element = uhash_nextElement(newRegionIDMap.getAlias(),&pos)) { Region *ar = (Region *)element->value.pointer; if ( availableRegions[ar->type] == NULL ) { LocalPointer<UVector> newAr(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status); availableRegions[ar->type] = newAr.orphan(); } LocalPointer<UnicodeString> arString(new UnicodeString(ar->idStr), status); if( U_FAILURE(status) ) { return; // error out } availableRegions[ar->type]->addElement((void *)arString.orphan(),status); } ucln_i18n_registerCleanup(UCLN_I18N_REGION, region_cleanup); // copy hashtables numericCodeMap = newNumericCodeMap.orphan(); regionIDMap = newRegionIDMap.orphan(); regionAliases = newRegionAliases.orphan(); }
NumberFormat* NumberFormat::makeInstance(const Locale& desiredLocale, UNumberFormatStyle style, UBool mustBeDecimalFormat, UErrorCode& status) { if (U_FAILURE(status)) return NULL; if (style < 0 || style >= UNUM_FORMAT_STYLE_COUNT) { status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } // Some styles are not supported. This is a result of merging // the @draft ICU 4.2 NumberFormat::EStyles into the long-existing UNumberFormatStyle. // Ticket #8503 is for reviewing/fixing/merging the two relevant implementations: // this one and unum_open(). // The UNUM_PATTERN_ styles are not supported here // because this method does not take a pattern string. if (!isStyleSupported(style)) { status = U_UNSUPPORTED_ERROR; return NULL; } #if U_PLATFORM_USES_ONLY_WIN32_API if (!mustBeDecimalFormat) { char buffer[8]; int32_t count = desiredLocale.getKeywordValue("compat", buffer, sizeof(buffer), status); // if the locale has "@compat=host", create a host-specific NumberFormat if (U_SUCCESS(status) && count > 0 && uprv_strcmp(buffer, "host") == 0) { Win32NumberFormat *f = NULL; UBool curr = TRUE; switch (style) { case UNUM_DECIMAL: curr = FALSE; // fall-through case UNUM_CURRENCY: case UNUM_CURRENCY_ISO: // do not support plural formatting here case UNUM_CURRENCY_PLURAL: f = new Win32NumberFormat(desiredLocale, curr, status); if (U_SUCCESS(status)) { return f; } delete f; break; default: break; } } } #endif // Use numbering system cache hashtable umtx_initOnce(gNSCacheInitOnce, &nscacheInit); // Get cached numbering system LocalPointer<NumberingSystem> ownedNs; NumberingSystem *ns = NULL; if (NumberingSystem_cache != NULL) { // TODO: Bad hash key usage, see ticket #8504. int32_t hashKey = desiredLocale.hashCode(); Mutex lock(&nscacheMutex); ns = (NumberingSystem *)uhash_iget(NumberingSystem_cache, hashKey); if (ns == NULL) { ns = NumberingSystem::createInstance(desiredLocale,status); uhash_iput(NumberingSystem_cache, hashKey, (void*)ns, &status); } } else { ownedNs.adoptInstead(NumberingSystem::createInstance(desiredLocale,status)); ns = ownedNs.getAlias(); } // check results of getting a numbering system if (U_FAILURE(status)) { return NULL; } if (mustBeDecimalFormat && ns->isAlgorithmic()) { status = U_UNSUPPORTED_ERROR; return NULL; } LocalPointer<DecimalFormatSymbols> symbolsToAdopt; UnicodeString pattern; LocalUResourceBundlePointer ownedResource(ures_open(NULL, desiredLocale.getName(), &status)); if (U_FAILURE(status)) { // We don't appear to have resource data available -- use the last-resort data status = U_USING_FALLBACK_WARNING; // When the data is unavailable, and locale isn't passed in, last resort data is used. symbolsToAdopt.adoptInstead(new DecimalFormatSymbols(status)); if (symbolsToAdopt.isNull()) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } // Creates a DecimalFormat instance with the last resort number patterns. pattern.setTo(TRUE, gLastResortNumberPatterns[style], -1); } else { // Loads the decimal symbols of the desired locale. symbolsToAdopt.adoptInstead(new DecimalFormatSymbols(desiredLocale, status)); if (symbolsToAdopt.isNull()) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } UResourceBundle *resource = ownedResource.orphan(); UResourceBundle *numElements = ures_getByKeyWithFallback(resource, gNumberElements, NULL, &status); resource = ures_getByKeyWithFallback(numElements, ns->getName(), resource, &status); resource = ures_getByKeyWithFallback(resource, gPatterns, resource, &status); ownedResource.adoptInstead(resource); int32_t patLen = 0; const UChar *patResStr = ures_getStringByKeyWithFallback(resource, gFormatKeys[style], &patLen, &status); // Didn't find a pattern specific to the numbering system, so fall back to "latn" if ( status == U_MISSING_RESOURCE_ERROR && uprv_strcmp(gLatn,ns->getName())) { status = U_ZERO_ERROR; resource = ures_getByKeyWithFallback(numElements, gLatn, resource, &status); resource = ures_getByKeyWithFallback(resource, gPatterns, resource, &status); patResStr = ures_getStringByKeyWithFallback(resource, gFormatKeys[style], &patLen, &status); } ures_close(numElements); // Creates the specified decimal format style of the desired locale. pattern.setTo(TRUE, patResStr, patLen); } if (U_FAILURE(status)) { return NULL; } if(style==UNUM_CURRENCY || style == UNUM_CURRENCY_ISO){ const UChar* currPattern = symbolsToAdopt->getCurrencyPattern(); if(currPattern!=NULL){ pattern.setTo(currPattern, u_strlen(currPattern)); } } NumberFormat *f; if (ns->isAlgorithmic()) { UnicodeString nsDesc; UnicodeString nsRuleSetGroup; UnicodeString nsRuleSetName; Locale nsLoc; URBNFRuleSetTag desiredRulesType = URBNF_NUMBERING_SYSTEM; nsDesc.setTo(ns->getDescription()); int32_t firstSlash = nsDesc.indexOf(gSlash); int32_t lastSlash = nsDesc.lastIndexOf(gSlash); if ( lastSlash > firstSlash ) { CharString nsLocID; nsLocID.appendInvariantChars(nsDesc.tempSubString(0, firstSlash), status); nsRuleSetGroup.setTo(nsDesc,firstSlash+1,lastSlash-firstSlash-1); nsRuleSetName.setTo(nsDesc,lastSlash+1); nsLoc = Locale::createFromName(nsLocID.data()); UnicodeString SpelloutRules = UNICODE_STRING_SIMPLE("SpelloutRules"); if ( nsRuleSetGroup.compare(SpelloutRules) == 0 ) { desiredRulesType = URBNF_SPELLOUT; } } else { nsLoc = desiredLocale; nsRuleSetName.setTo(nsDesc); } RuleBasedNumberFormat *r = new RuleBasedNumberFormat(desiredRulesType,nsLoc,status); if (r == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } r->setDefaultRuleSet(nsRuleSetName,status); f = r; } else { // replace single currency sign in the pattern with double currency sign // if the style is UNUM_CURRENCY_ISO if (style == UNUM_CURRENCY_ISO) { pattern.findAndReplace(UnicodeString(TRUE, gSingleCurrencySign, 1), UnicodeString(TRUE, gDoubleCurrencySign, 2)); } // "new DecimalFormat()" does not adopt the symbols if its memory allocation fails. DecimalFormatSymbols *syms = symbolsToAdopt.orphan(); f = new DecimalFormat(pattern, syms, style, status); if (f == NULL) { delete syms; status = U_MEMORY_ALLOCATION_ERROR; return NULL; } } f->setLocaleIDs(ures_getLocaleByType(ownedResource.getAlias(), ULOC_VALID_LOCALE, &status), ures_getLocaleByType(ownedResource.getAlias(), ULOC_ACTUAL_LOCALE, &status)); if (U_FAILURE(status)) { delete f; return NULL; } return f; }
BreakIterator * SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UErrorCode& status) { LocalPointer<BreakIterator> adopt(adoptBreakIterator); if(U_FAILURE(status)) { return NULL; } LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status)); LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status)); int32_t revCount = 0; int32_t fwdCount = 0; int32_t subCount = fSet.size(); LocalArray<UnicodeString> ustrs(new UnicodeString[subCount]); LocalArray<int> partials(new int[subCount]); LocalPointer<UCharsTrie> backwardsTrie; // i.e. ".srM" for Mrs. LocalPointer<UCharsTrie> forwardsPartialTrie; // Has ".a" for "a.M." int n=0; for ( set<UnicodeString>::iterator i = fSet.begin(); i != fSet.end(); i++) { const UnicodeString &abbr = *i; ustrs[n] = abbr; partials[n] = 0; // default: not partial n++; } // first pass - find partials. for(int i=0;i<subCount;i++) { int nn = ustrs[i].indexOf(kFULLSTOP); // TODO: non-'.' abbreviations if(nn>-1 && (nn+1)!=ustrs[i].length()) { //if(true) u_printf("Is a partial: /%S/\n", ustrs[i].getTerminatedBuffer()); // is partial. // is it unique? int sameAs = -1; for(int j=0;j<subCount;j++) { if(j==i) continue; if(ustrs[i].compare(0,nn+1,ustrs[j],0,nn+1)==0) { //if(true) u_printf("Prefix match: /%S/ to %d\n", ustrs[j].getTerminatedBuffer(), nn+1); //UBool otherIsPartial = ((nn+1)!=ustrs[j].length()); // true if ustrs[j] doesn't end at nn if(partials[j]==0) { // hasn't been processed yet partials[j] = kSuppressInReverse | kAddToForward; //if(true) u_printf("Suppressing: /%S/\n", ustrs[j].getTerminatedBuffer()); } else if(partials[j] & kSuppressInReverse) { sameAs = j; // the other entry is already in the reverse table. } } } //if(debug2) u_printf("for partial /%S/ same=%d partials=%d\n", ustrs[i].getTerminatedBuffer(), sameAs, partials[i]); UnicodeString prefix(ustrs[i], 0, nn+1); if(sameAs == -1 && partials[i] == 0) { // first one - add the prefix to the reverse table. prefix.reverse(); builder->add(prefix, kPARTIAL, status); revCount++; //if(debug2) u_printf("Added Partial: /%S/ from /%S/ status=%s\n", prefix.getTerminatedBuffer(), ustrs[i].getTerminatedBuffer(), u_errorName(status)); partials[i] = kSuppressInReverse | kAddToForward; } else { //if(debug2) u_printf(" // not adding partial for /%S/ from /%S/\n", prefix.getTerminatedBuffer(), ustrs[i].getTerminatedBuffer()); } } } for(int i=0;i<subCount;i++) { if(partials[i]==0) { ustrs[i].reverse(); builder->add(ustrs[i], kMATCH, status); revCount++; //if(debug2) u_printf("Added: /%S/ status=%s\n", ustrs[i].getTerminatedBuffer(), u_errorName(status)); } else { //if(debug2) u_printf(" Adding fwd: /%S/\n", ustrs[i].getTerminatedBuffer()); // an optimization would be to only add the portion after the '.' // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward, // instead of "Ph.D." since we already know the "Ph." part is a match. // would need the trie to be able to hold 0-length strings, though. builder2->add(ustrs[i], kMATCH, status); // forward fwdCount++; //ustrs[i].reverse(); ////if(debug2) u_printf("SUPPRESS- not Added(%d): /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status)); } } //if(debug) u_printf(" %s has %d abbrs.\n", fJSONSource.c_str(), subCount); if(revCount>0) { backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status)); if(U_FAILURE(status)) { //printf("Error %s building backwards\n", u_errorName(status)); return NULL; } } if(fwdCount>0) { forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status)); if(U_FAILURE(status)) { //printf("Error %s building forwards\n", u_errorName(status)); return NULL; } } return new ULISentenceBreakIterator(adopt.orphan(), forwardsPartialTrie.orphan(), backwardsTrie.orphan(), status); }