void TextGroup::addRun(const UnicodeString &input, UBiDiDirection direction, int32_t start, int32_t end) { std::string text; input.tempSubString(start, end - start).toUTF8String(text); printf("Hominlinx-->======TextGroup::addRun[%s]==== %d\n",text.c_str(), input.charAt(0) ); runs_.emplace_back(text, script_, lang_, uciDirectionToHB(direction)); }
// populatePrefixSuffix Adds a specific prefix-suffix pair to result for a // given variant and log10 value. // variant is 'zero', 'one', 'two', 'few', 'many', or 'other'. // formatStr is the format string from which the prefix and suffix are // extracted. It is usually of form 'Pefix 000 suffix'. // populatePrefixSuffix returns the number of 0's found in formatStr // before the decimal point. // In the special case that formatStr contains only spaces for prefix // and suffix, populatePrefixSuffix returns log10Value + 1. static int32_t populatePrefixSuffix( const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status) { if (U_FAILURE(status)) { return 0; } int32_t firstIdx = formatStr.indexOf(kZero, UPRV_LENGTHOF(kZero), 0); // We must have 0's in format string. if (firstIdx == -1) { status = U_INTERNAL_PROGRAM_ERROR; return 0; } int32_t lastIdx = formatStr.lastIndexOf(kZero, UPRV_LENGTHOF(kZero), firstIdx); CDFUnit* unit = createCDFUnit(variant, log10Value, result, status); if (U_FAILURE(status)) { return 0; } // Return -1 if we are not overwriting an existing value if (unit->isSet() && !overwrite) { return -1; } unit->markAsSet(); // Everything up to first 0 is the prefix unit->prefix = formatStr.tempSubString(0, firstIdx); fixQuotes(unit->prefix); // Everything beyond the last 0 is the suffix unit->suffix = formatStr.tempSubString(lastIdx + 1); fixQuotes(unit->suffix); // If there is effectively no prefix or suffix, ignore the actual number of // 0's and act as if the number of 0's matches the size of the number. if (onlySpaces(unit->prefix) && onlySpaces(unit->suffix)) { return log10Value + 1; } // Calculate number of zeros before decimal point int32_t idx = firstIdx + 1; while (idx <= lastIdx && formatStr.charAt(idx) == u_0) { ++idx; } return (idx - firstIdx); }
/* * EACH RUN MUST BE ON ITS OWN LINE BECAUSE IN XCode'S CONSOLE IS RE-BIDIZING THE OUTPUT */ void Test::spitRun(const UnicodeString &text, UBiDiDirection direction, int32_t start, int32_t end) { std::string tmp; text.tempSubString(start, end - start).toUTF8String(tmp); #ifdef SPIT_DETAILS output << ((direction == UBIDI_RTL) ? "RTL " : "") << "[" << start << " | " << end << "]" << endl; #endif output << tmp << endl; #ifdef SPIT_DETAILS output << " " << endl; #endif }
void CompactDecimalFormatTest::TestFieldPosition() { // Swahili uses prefixes which forces offsets in field position to change UErrorCode status = U_ZERO_ERROR; LocalPointer<CompactDecimalFormat> cdf(createCDFInstance("sw", UNUM_SHORT, status)); if (U_FAILURE(status)) { dataerrln("Unable to create format object - %s", u_errorName(status)); return; } FieldPosition fp(UNUM_INTEGER_FIELD); UnicodeString result; cdf->format(1234567.0, result, fp); UnicodeString subString = result.tempSubString(fp.getBeginIndex(), fp.getEndIndex() - fp.getBeginIndex()); if (subString != UnicodeString("1", -1, US_INV)) { errln(UnicodeString("Expected 1, got ") + subString); } }
UnicodeString getString(const UnicodeString &strings) const { int32_t length=strings[stringOffset]; return strings.tempSubString(stringOffset+1, length); }
//---------------------------------------------------------------------------- // // main for gendict // //---------------------------------------------------------------------------- int main(int argc, char **argv) { // // Pick up and check the command line arguments, // using the standard ICU tool utils option handling. // U_MAIN_INIT_ARGS(argc, argv); progName = argv[0]; argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); if(argc<0) { // Unrecognized option fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]); usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); } if(options[ARG_HELP].doesOccur || options[ARG_QMARK].doesOccur) { // -? or -h for help. usageAndDie(U_ZERO_ERROR); } UBool verbose = options[ARG_VERBOSE].doesOccur; if (argc < 3) { fprintf(stderr, "input and output file must both be specified.\n"); usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); } const char *outFileName = argv[2]; const char *wordFileName = argv[1]; startTime = uprv_getRawUTCtime(); // initialize start timer if (options[ARG_ICUDATADIR].doesOccur) { u_setDataDirectory(options[ARG_ICUDATADIR].value); } const char *copyright = NULL; if (options[ARG_COPYRIGHT].doesOccur) { copyright = U_COPYRIGHT_STRING; } if (options[ARG_UCHARS].doesOccur == options[ARG_BYTES].doesOccur) { fprintf(stderr, "you must specify exactly one type of trie to output!\n"); usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); } UBool isBytesTrie = options[ARG_BYTES].doesOccur; if (isBytesTrie != options[ARG_TRANSFORM].doesOccur) { fprintf(stderr, "you must provide a transformation for a bytes trie, and must not provide one for a uchars trie!\n"); usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); } IcuToolErrorCode status("gendict/main()"); #if UCONFIG_NO_BREAK_ITERATION || UCONFIG_NO_FILE_IO const char* outDir=NULL; UNewDataMemory *pData; char msg[1024]; UErrorCode tempstatus = U_ZERO_ERROR; /* write message with just the name */ // potential for a buffer overflow here... sprintf(msg, "gendict writes dummy %s because of UCONFIG_NO_BREAK_ITERATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName); fprintf(stderr, "%s\n", msg); /* write the dummy data file */ pData = udata_create(outDir, NULL, outFileName, &dataInfo, NULL, &tempstatus); udata_writeBlock(pData, msg, strlen(msg)); udata_finish(pData, &tempstatus); return (int)tempstatus; #else // Read in the dictionary source file if (verbose) { printf("Opening file %s...\n", wordFileName); } const char *codepage = "UTF-8"; UCHARBUF *f = ucbuf_open(wordFileName, &codepage, TRUE, FALSE, status); if (status.isFailure()) { fprintf(stderr, "error opening input file: ICU Error \"%s\"\n", status.errorName()); exit(status.reset()); } if (verbose) { printf("Initializing dictionary builder of type %s...\n", (isBytesTrie ? "BytesTrie" : "UCharsTrie")); } DataDict dict(isBytesTrie, status); if (status.isFailure()) { fprintf(stderr, "new DataDict: ICU Error \"%s\"\n", status.errorName()); exit(status.reset()); } if (options[ARG_TRANSFORM].doesOccur) { dict.setTransform(options[ARG_TRANSFORM].value); } UnicodeString fileLine; if (verbose) { puts("Adding words to dictionary..."); } UBool hasValues = FALSE; UBool hasValuelessContents = FALSE; int lineCount = 0; int wordCount = 0; int minlen = 255; int maxlen = 0; UBool isOk = TRUE; while (readLine(f, fileLine, status)) { lineCount++; if (fileLine.isEmpty()) continue; // Parse word [spaces value]. int32_t keyLen; for (keyLen = 0; keyLen < fileLine.length() && !u_isspace(fileLine[keyLen]); ++keyLen) {} if (keyLen == 0) { fprintf(stderr, "Error: no word on line %i!\n", lineCount); isOk = FALSE; continue; } int32_t valueStart; for (valueStart = keyLen; valueStart < fileLine.length() && u_isspace(fileLine[valueStart]); ++valueStart) {} if (keyLen < valueStart) { int32_t valueLength = fileLine.length() - valueStart; if (valueLength > 15) { fprintf(stderr, "Error: value too long on line %i!\n", lineCount); isOk = FALSE; continue; } char s[16]; fileLine.extract(valueStart, valueLength, s, 16, US_INV); char *end; unsigned long value = uprv_strtoul(s, &end, 0); if (end == s || *end != 0 || (int32_t)uprv_strlen(s) != valueLength || value > 0xffffffff) { fprintf(stderr, "Error: value syntax error or value too large on line %i!\n", lineCount); isOk = FALSE; continue; } dict.addWord(fileLine.tempSubString(0, keyLen), (int32_t)value, status); hasValues = TRUE; wordCount++; if (keyLen < minlen) minlen = keyLen; if (keyLen > maxlen) maxlen = keyLen; } else { dict.addWord(fileLine.tempSubString(0, keyLen), 0, status); hasValuelessContents = TRUE; wordCount++; if (keyLen < minlen) minlen = keyLen; if (keyLen > maxlen) maxlen = keyLen; } if (status.isFailure()) { fprintf(stderr, "ICU Error \"%s\": Failed to add word to trie at input line %d in input file\n", status.errorName(), lineCount); exit(status.reset()); } } if (verbose) { printf("Processed %d lines, added %d words, minlen %d, maxlen %d\n", lineCount, wordCount, minlen, maxlen); } if (!isOk && status.isSuccess()) { status.set(U_ILLEGAL_ARGUMENT_ERROR); } if (hasValues && hasValuelessContents) { fprintf(stderr, "warning: file contained both valued and unvalued strings!\n"); } if (verbose) { printf("Serializing data...isBytesTrie? %d\n", isBytesTrie); } int32_t outDataSize; const void *outData; UnicodeString usp; if (isBytesTrie) { StringPiece sp = dict.serializeBytes(status); outDataSize = sp.size(); outData = sp.data(); } else { dict.serializeUChars(usp, status); outDataSize = usp.length() * U_SIZEOF_UCHAR; outData = usp.getBuffer(); } if (status.isFailure()) { fprintf(stderr, "gendict: got failure of type %s while serializing, if U_ILLEGAL_ARGUMENT_ERROR possibly due to duplicate dictionary entries\n", status.errorName()); exit(status.reset()); } if (verbose) { puts("Opening output file..."); } UNewDataMemory *pData = udata_create(NULL, NULL, outFileName, &dataInfo, copyright, status); if (status.isFailure()) { fprintf(stderr, "gendict: could not open output file \"%s\", \"%s\"\n", outFileName, status.errorName()); exit(status.reset()); } if (verbose) { puts("Writing to output file..."); } int32_t indexes[DictionaryData::IX_COUNT] = { DictionaryData::IX_COUNT * sizeof(int32_t), 0, 0, 0, 0, 0, 0, 0 }; int32_t size = outDataSize + indexes[DictionaryData::IX_STRING_TRIE_OFFSET]; indexes[DictionaryData::IX_RESERVED1_OFFSET] = size; indexes[DictionaryData::IX_RESERVED2_OFFSET] = size; indexes[DictionaryData::IX_TOTAL_SIZE] = size; indexes[DictionaryData::IX_TRIE_TYPE] = isBytesTrie ? DictionaryData::TRIE_TYPE_BYTES : DictionaryData::TRIE_TYPE_UCHARS; if (hasValues) { indexes[DictionaryData::IX_TRIE_TYPE] |= DictionaryData::TRIE_HAS_VALUES; } indexes[DictionaryData::IX_TRANSFORM] = dict.getTransform(); udata_writeBlock(pData, indexes, sizeof(indexes)); udata_writeBlock(pData, outData, outDataSize); size_t bytesWritten = udata_finish(pData, status); if (status.isFailure()) { fprintf(stderr, "gendict: error \"%s\" writing the output file\n", status.errorName()); exit(status.reset()); } if (bytesWritten != (size_t)size) { fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName); exit(U_INTERNAL_PROGRAM_ERROR); } printf("%s: done writing\t%s (%ds).\n", progName, outFileName, elapsedTime()); #ifdef TEST_GENDICT if (isBytesTrie) { BytesTrie::Iterator it(outData, outDataSize, status); while (it.hasNext()) { it.next(status); const StringPiece s = it.getString(); int32_t val = it.getValue(); printf("%s -> %i\n", s.data(), val); } } else { UCharsTrie::Iterator it((const UChar *)outData, outDataSize, status); while (it.hasNext()) { it.next(status); const UnicodeString s = it.getString(); int32_t val = it.getValue(); char tmp[1024]; s.extract(0, s.length(), tmp, 1024); printf("%s -> %i\n", tmp, val); } } #endif return 0; #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ }
NumberFormat* NumberFormat::makeInstance(const Locale& desiredLocale, UNumberFormatStyle style, UBool mustBeDecimalFormat, UErrorCode& status) { if (U_FAILURE(status)) return NULL; if (style < 0 || style >= UNUM_FORMAT_STYLE_COUNT) { status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } // Some styles are not supported. This is a result of merging // the @draft ICU 4.2 NumberFormat::EStyles into the long-existing UNumberFormatStyle. // Ticket #8503 is for reviewing/fixing/merging the two relevant implementations: // this one and unum_open(). // The UNUM_PATTERN_ styles are not supported here // because this method does not take a pattern string. if (!isStyleSupported(style)) { status = U_UNSUPPORTED_ERROR; return NULL; } #if U_PLATFORM_USES_ONLY_WIN32_API if (!mustBeDecimalFormat) { char buffer[8]; int32_t count = desiredLocale.getKeywordValue("compat", buffer, sizeof(buffer), status); // if the locale has "@compat=host", create a host-specific NumberFormat if (U_SUCCESS(status) && count > 0 && uprv_strcmp(buffer, "host") == 0) { Win32NumberFormat *f = NULL; UBool curr = TRUE; switch (style) { case UNUM_DECIMAL: curr = FALSE; // fall-through case UNUM_CURRENCY: case UNUM_CURRENCY_ISO: // do not support plural formatting here case UNUM_CURRENCY_PLURAL: f = new Win32NumberFormat(desiredLocale, curr, status); if (U_SUCCESS(status)) { return f; } delete f; break; default: break; } } } #endif // Use numbering system cache hashtable umtx_initOnce(gNSCacheInitOnce, &nscacheInit); // Get cached numbering system LocalPointer<NumberingSystem> ownedNs; NumberingSystem *ns = NULL; if (NumberingSystem_cache != NULL) { // TODO: Bad hash key usage, see ticket #8504. int32_t hashKey = desiredLocale.hashCode(); Mutex lock(&nscacheMutex); ns = (NumberingSystem *)uhash_iget(NumberingSystem_cache, hashKey); if (ns == NULL) { ns = NumberingSystem::createInstance(desiredLocale,status); uhash_iput(NumberingSystem_cache, hashKey, (void*)ns, &status); } } else { ownedNs.adoptInstead(NumberingSystem::createInstance(desiredLocale,status)); ns = ownedNs.getAlias(); } // check results of getting a numbering system if (U_FAILURE(status)) { return NULL; } if (mustBeDecimalFormat && ns->isAlgorithmic()) { status = U_UNSUPPORTED_ERROR; return NULL; } LocalPointer<DecimalFormatSymbols> symbolsToAdopt; UnicodeString pattern; LocalUResourceBundlePointer ownedResource(ures_open(NULL, desiredLocale.getName(), &status)); if (U_FAILURE(status)) { // We don't appear to have resource data available -- use the last-resort data status = U_USING_FALLBACK_WARNING; // When the data is unavailable, and locale isn't passed in, last resort data is used. symbolsToAdopt.adoptInstead(new DecimalFormatSymbols(status)); if (symbolsToAdopt.isNull()) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } // Creates a DecimalFormat instance with the last resort number patterns. pattern.setTo(TRUE, gLastResortNumberPatterns[style], -1); } else { // Loads the decimal symbols of the desired locale. symbolsToAdopt.adoptInstead(new DecimalFormatSymbols(desiredLocale, status)); if (symbolsToAdopt.isNull()) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } UResourceBundle *resource = ownedResource.orphan(); UResourceBundle *numElements = ures_getByKeyWithFallback(resource, gNumberElements, NULL, &status); resource = ures_getByKeyWithFallback(numElements, ns->getName(), resource, &status); resource = ures_getByKeyWithFallback(resource, gPatterns, resource, &status); ownedResource.adoptInstead(resource); int32_t patLen = 0; const UChar *patResStr = ures_getStringByKeyWithFallback(resource, gFormatKeys[style], &patLen, &status); // Didn't find a pattern specific to the numbering system, so fall back to "latn" if ( status == U_MISSING_RESOURCE_ERROR && uprv_strcmp(gLatn,ns->getName())) { status = U_ZERO_ERROR; resource = ures_getByKeyWithFallback(numElements, gLatn, resource, &status); resource = ures_getByKeyWithFallback(resource, gPatterns, resource, &status); patResStr = ures_getStringByKeyWithFallback(resource, gFormatKeys[style], &patLen, &status); } ures_close(numElements); // Creates the specified decimal format style of the desired locale. pattern.setTo(TRUE, patResStr, patLen); } if (U_FAILURE(status)) { return NULL; } if(style==UNUM_CURRENCY || style == UNUM_CURRENCY_ISO){ const UChar* currPattern = symbolsToAdopt->getCurrencyPattern(); if(currPattern!=NULL){ pattern.setTo(currPattern, u_strlen(currPattern)); } } NumberFormat *f; if (ns->isAlgorithmic()) { UnicodeString nsDesc; UnicodeString nsRuleSetGroup; UnicodeString nsRuleSetName; Locale nsLoc; URBNFRuleSetTag desiredRulesType = URBNF_NUMBERING_SYSTEM; nsDesc.setTo(ns->getDescription()); int32_t firstSlash = nsDesc.indexOf(gSlash); int32_t lastSlash = nsDesc.lastIndexOf(gSlash); if ( lastSlash > firstSlash ) { CharString nsLocID; nsLocID.appendInvariantChars(nsDesc.tempSubString(0, firstSlash), status); nsRuleSetGroup.setTo(nsDesc,firstSlash+1,lastSlash-firstSlash-1); nsRuleSetName.setTo(nsDesc,lastSlash+1); nsLoc = Locale::createFromName(nsLocID.data()); UnicodeString SpelloutRules = UNICODE_STRING_SIMPLE("SpelloutRules"); if ( nsRuleSetGroup.compare(SpelloutRules) == 0 ) { desiredRulesType = URBNF_SPELLOUT; } } else { nsLoc = desiredLocale; nsRuleSetName.setTo(nsDesc); } RuleBasedNumberFormat *r = new RuleBasedNumberFormat(desiredRulesType,nsLoc,status); if (r == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } r->setDefaultRuleSet(nsRuleSetName,status); f = r; } else { // replace single currency sign in the pattern with double currency sign // if the style is UNUM_CURRENCY_ISO if (style == UNUM_CURRENCY_ISO) { pattern.findAndReplace(UnicodeString(TRUE, gSingleCurrencySign, 1), UnicodeString(TRUE, gDoubleCurrencySign, 2)); } // "new DecimalFormat()" does not adopt the symbols if its memory allocation fails. DecimalFormatSymbols *syms = symbolsToAdopt.orphan(); f = new DecimalFormat(pattern, syms, style, status); if (f == NULL) { delete syms; status = U_MEMORY_ALLOCATION_ERROR; return NULL; } } f->setLocaleIDs(ures_getLocaleByType(ownedResource.getAlias(), ULOC_VALID_LOCALE, &status), ures_getLocaleByType(ownedResource.getAlias(), ULOC_ACTUAL_LOCALE, &status)); if (U_FAILURE(status)) { delete f; return NULL; } return f; }