virtual int32_t spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const { return s.length(); }
UBool TransliteratorSpec::hasFallback() const { return nextSpec.length() != 0; }
Transliterator* TransliteratorAlias::create(UParseError& pe, UErrorCode& ec) { if (U_FAILURE(ec)) { return 0; } Transliterator *t = NULL; switch (type) { case SIMPLE: t = Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec); if(U_FAILURE(ec)){ return 0; } if (compoundFilter != 0) t->adoptFilter((UnicodeSet*)compoundFilter->clone()); break; case COMPOUND: { // the total number of transliterators in the compound is the total number of anonymous transliterators // plus the total number of ID blocks-- we start by assuming the list begins and ends with an ID // block and that each pair anonymous transliterators has an ID block between them. Then we go back // to see whether there really are ID blocks at the beginning and end (by looking for U+FFFF, which // marks the position where an anonymous transliterator goes) and adjust accordingly int32_t anonymousRBTs = transes->size(); int32_t transCount = anonymousRBTs * 2 + 1; if (!aliasesOrRules.isEmpty() && aliasesOrRules[0] == (UChar)(0xffff)) --transCount; if (aliasesOrRules.length() >= 2 && aliasesOrRules[aliasesOrRules.length() - 1] == (UChar)(0xffff)) --transCount; UnicodeString noIDBlock((UChar)(0xffff)); noIDBlock += ((UChar)(0xffff)); int32_t pos = aliasesOrRules.indexOf(noIDBlock); while (pos >= 0) { --transCount; pos = aliasesOrRules.indexOf(noIDBlock, pos + 1); } UVector transliterators(ec); UnicodeString idBlock; int32_t blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff)); while (blockSeparatorPos >= 0) { aliasesOrRules.extract(0, blockSeparatorPos, idBlock); aliasesOrRules.remove(0, blockSeparatorPos + 1); if (!idBlock.isEmpty()) transliterators.addElement(Transliterator::createInstance(idBlock, UTRANS_FORWARD, pe, ec), ec); if (!transes->isEmpty()) transliterators.addElement(transes->orphanElementAt(0), ec); blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff)); } if (!aliasesOrRules.isEmpty()) transliterators.addElement(Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec), ec); while (!transes->isEmpty()) transliterators.addElement(transes->orphanElementAt(0), ec); if (U_SUCCESS(ec)) { t = new CompoundTransliterator(ID, transliterators, (compoundFilter ? (UnicodeSet*)(compoundFilter->clone()) : 0), anonymousRBTs, pe, ec); if (t == 0) { ec = U_MEMORY_ALLOCATION_ERROR; return 0; } } else { for (int32_t i = 0; i < transliterators.size(); i++) delete (Transliterator*)(transliterators.elementAt(i)); } } break; case RULES: U_ASSERT(FALSE); // don't call create() if isRuleBased() returns TRUE! break; } return t; }
/** * Parse the pattern from the given RuleCharacterIterator. The * iterator is advanced over the parsed pattern. * @param chars iterator over the pattern characters. Upon return * it will be advanced to the first character after the parsed * pattern, or the end of the iteration if all characters are * parsed. * @param symbols symbol table to use to parse and dereference * variables, or null if none. * @param rebuiltPat the pattern that was parsed, rebuilt or * copied from the input pattern, as appropriate. * @param options a bit mask of zero or more of the following: * IGNORE_SPACE, CASE. */ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, const SymbolTable* symbols, UnicodeString& rebuiltPat, uint32_t options, UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute), UErrorCode& ec) { if (U_FAILURE(ec)) return; // Syntax characters: [ ] ^ - & { } // Recognized special forms for chars, sets: c-c s-s s&s int32_t opts = RuleCharacterIterator::PARSE_VARIABLES | RuleCharacterIterator::PARSE_ESCAPES; if ((options & USET_IGNORE_SPACE) != 0) { opts |= RuleCharacterIterator::SKIP_WHITESPACE; } UnicodeString patLocal, buf; UBool usePat = FALSE; UnicodeSetPointer scratch; RuleCharacterIterator::Pos backup; // mode: 0=before [, 1=between [...], 2=after ] // lastItem: 0=none, 1=char, 2=set int8_t lastItem = 0, mode = 0; UChar32 lastChar = 0; UChar op = 0; UBool invert = FALSE; clear(); while (mode != 2 && !chars.atEnd()) { U_ASSERT((lastItem == 0 && op == 0) || (lastItem == 1 && (op == 0 || op == HYPHEN /*'-'*/)) || (lastItem == 2 && (op == 0 || op == HYPHEN /*'-'*/ || op == INTERSECTION /*'&'*/))); UChar32 c = 0; UBool literal = FALSE; UnicodeSet* nested = 0; // alias - do not delete // -------- Check for property pattern // setMode: 0=none, 1=unicodeset, 2=propertypat, 3=preparsed int8_t setMode = 0; if (resemblesPropertyPattern(chars, opts)) { setMode = 2; } // -------- Parse '[' of opening delimiter OR nested set. // If there is a nested set, use `setMode' to define how // the set should be parsed. If the '[' is part of the // opening delimiter for this pattern, parse special // strings "[", "[^", "[-", and "[^-". Check for stand-in // characters representing a nested set in the symbol // table. else { // Prepare to backup if necessary chars.getPos(backup); c = chars.next(opts, literal, ec); if (U_FAILURE(ec)) return; if (c == 0x5B /*'['*/ && !literal) { if (mode == 1) { chars.setPos(backup); // backup setMode = 1; } else { // Handle opening '[' delimiter mode = 1; patLocal.append((UChar) 0x5B /*'['*/); chars.getPos(backup); // prepare to backup c = chars.next(opts, literal, ec); if (U_FAILURE(ec)) return; if (c == 0x5E /*'^'*/ && !literal) { invert = TRUE; patLocal.append((UChar) 0x5E /*'^'*/); chars.getPos(backup); // prepare to backup c = chars.next(opts, literal, ec); if (U_FAILURE(ec)) return; } // Fall through to handle special leading '-'; // otherwise restart loop for nested [], \p{}, etc. if (c == HYPHEN /*'-'*/) { literal = TRUE; // Fall through to handle literal '-' below } else { chars.setPos(backup); // backup continue; } } } else if (symbols != 0) { const UnicodeFunctor *m = symbols->lookupMatcher(c); if (m != 0) { const UnicodeSet *ms = dynamic_cast<const UnicodeSet *>(m); if (ms == NULL) { ec = U_MALFORMED_SET; return; } // casting away const, but `nested' won't be modified // (important not to modify stored set) nested = const_cast<UnicodeSet*>(ms); setMode = 3; } } } // -------- Handle a nested set. This either is inline in // the pattern or represented by a stand-in that has // previously been parsed and was looked up in the symbol // table. if (setMode != 0) { if (lastItem == 1) { if (op != 0) { // syntaxError(chars, "Char expected after operator"); ec = U_MALFORMED_SET; return; } add(lastChar, lastChar); _appendToPat(patLocal, lastChar, FALSE); lastItem = 0; op = 0; } if (op == HYPHEN /*'-'*/ || op == INTERSECTION /*'&'*/) { patLocal.append(op); } if (nested == 0) { // lazy allocation if (!scratch.allocate()) { ec = U_MEMORY_ALLOCATION_ERROR; return; } nested = scratch.pointer(); } switch (setMode) { case 1: nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec); break; case 2: chars.skipIgnored(opts); nested->applyPropertyPattern(chars, patLocal, ec); if (U_FAILURE(ec)) return; break; case 3: // `nested' already parsed nested->_toPattern(patLocal, FALSE); break; } usePat = TRUE; if (mode == 0) { // Entire pattern is a category; leave parse loop *this = *nested; mode = 2; break; } switch (op) { case HYPHEN: /*'-'*/ removeAll(*nested); break; case INTERSECTION: /*'&'*/ retainAll(*nested); break; case 0: addAll(*nested); break; } op = 0; lastItem = 2; continue; } if (mode == 0) { // syntaxError(chars, "Missing '['"); ec = U_MALFORMED_SET; return; } // -------- Parse special (syntax) characters. If the // current character is not special, or if it is escaped, // then fall through and handle it below. if (!literal) { switch (c) { case 0x5D /*']'*/: if (lastItem == 1) { add(lastChar, lastChar); _appendToPat(patLocal, lastChar, FALSE); } // Treat final trailing '-' as a literal if (op == HYPHEN /*'-'*/) { add(op, op); patLocal.append(op); } else if (op == INTERSECTION /*'&'*/) { // syntaxError(chars, "Trailing '&'"); ec = U_MALFORMED_SET; return; } patLocal.append((UChar) 0x5D /*']'*/); mode = 2; continue; case HYPHEN /*'-'*/: if (op == 0) { if (lastItem != 0) { op = (UChar) c; continue; } else { // Treat final trailing '-' as a literal add(c, c); c = chars.next(opts, literal, ec); if (U_FAILURE(ec)) return; if (c == 0x5D /*']'*/ && !literal) { patLocal.append(HYPHEN_RIGHT_BRACE, 2); mode = 2; continue; } } } // syntaxError(chars, "'-' not after char or set"); ec = U_MALFORMED_SET; return; case INTERSECTION /*'&'*/: if (lastItem == 2 && op == 0) { op = (UChar) c; continue; } // syntaxError(chars, "'&' not after set"); ec = U_MALFORMED_SET; return; case 0x5E /*'^'*/: // syntaxError(chars, "'^' not after '['"); ec = U_MALFORMED_SET; return; case 0x7B /*'{'*/: if (op != 0) { // syntaxError(chars, "Missing operand after operator"); ec = U_MALFORMED_SET; return; } if (lastItem == 1) { add(lastChar, lastChar); _appendToPat(patLocal, lastChar, FALSE); } lastItem = 0; buf.truncate(0); { UBool ok = FALSE; while (!chars.atEnd()) { c = chars.next(opts, literal, ec); if (U_FAILURE(ec)) return; if (c == 0x7D /*'}'*/ && !literal) { ok = TRUE; break; } buf.append(c); } if (buf.length() < 1 || !ok) { // syntaxError(chars, "Invalid multicharacter string"); ec = U_MALFORMED_SET; return; } } // We have new string. Add it to set and continue; // we don't need to drop through to the further // processing add(buf); patLocal.append((UChar) 0x7B /*'{'*/); _appendToPat(patLocal, buf, FALSE); patLocal.append((UChar) 0x7D /*'}'*/); continue; case SymbolTable::SYMBOL_REF: // symbols nosymbols // [a-$] error error (ambiguous) // [a$] anchor anchor // [a-$x] var "x"* literal '$' // [a-$.] error literal '$' // *We won't get here in the case of var "x" { chars.getPos(backup); c = chars.next(opts, literal, ec); if (U_FAILURE(ec)) return; UBool anchor = (c == 0x5D /*']'*/ && !literal); if (symbols == 0 && !anchor) { c = SymbolTable::SYMBOL_REF; chars.setPos(backup); break; // literal '$' } if (anchor && op == 0) { if (lastItem == 1) { add(lastChar, lastChar); _appendToPat(patLocal, lastChar, FALSE); } add(U_ETHER); usePat = TRUE; patLocal.append((UChar) SymbolTable::SYMBOL_REF); patLocal.append((UChar) 0x5D /*']'*/); mode = 2; continue; } // syntaxError(chars, "Unquoted '$'"); ec = U_MALFORMED_SET; return; } default: break; } } // -------- Parse literal characters. This includes both // escaped chars ("\u4E01") and non-syntax characters // ("a"). switch (lastItem) { case 0: lastItem = 1; lastChar = c; break; case 1: if (op == HYPHEN /*'-'*/) { if (lastChar >= c) { // Don't allow redundant (a-a) or empty (b-a) ranges; // these are most likely typos. // syntaxError(chars, "Invalid range"); ec = U_MALFORMED_SET; return; } add(lastChar, c); _appendToPat(patLocal, lastChar, FALSE); patLocal.append(op); _appendToPat(patLocal, c, FALSE); lastItem = 0; op = 0; } else { add(lastChar, lastChar); _appendToPat(patLocal, lastChar, FALSE); lastChar = c; } break; case 2: if (op != 0) { // syntaxError(chars, "Set expected after operator"); ec = U_MALFORMED_SET; return; } lastChar = c; lastItem = 1; break; } } if (mode != 2) { // syntaxError(chars, "Missing ']'"); ec = U_MALFORMED_SET; return; } chars.skipIgnored(opts); /** * Handle global flags (invert, case insensitivity). If this * pattern should be compiled case-insensitive, then we need * to close over case BEFORE COMPLEMENTING. This makes * patterns like /[^abc]/i work. */ if ((options & USET_CASE_INSENSITIVE) != 0) { (this->*caseClosure)(USET_CASE_INSENSITIVE); } else if ((options & USET_ADD_CASE_MAPPINGS) != 0) { (this->*caseClosure)(USET_ADD_CASE_MAPPINGS); } if (invert) { complement(); } // Use the rebuilt pattern (patLocal) only if necessary. Prefer the // generated pattern. if (usePat) { rebuiltPat.append(patLocal); } else { _generatePattern(rebuiltPat, FALSE); } if (isBogus() && U_SUCCESS(ec)) { // We likely ran out of memory. AHHH! ec = U_MEMORY_ALLOCATION_ERROR; } }
/** * If in "by digits" mode, fills in the substitution one decimal digit * at a time using the rule set containing this substitution. * Otherwise, uses the superclass function. * @param number The number being formatted * @param toInsertInto The string to insert the result of formatting * the substitution into * @param pos The position of the owning rule's rule text in * toInsertInto */ void FractionalPartSubstitution::doSubstitution(double number, UnicodeString& toInsertInto, int32_t _pos) const { // if we're not in "byDigits" mode, just use the inherited // doSubstitution() routine if (!byDigits) { NFSubstitution::doSubstitution(number, toInsertInto, _pos); // if we're in "byDigits" mode, transform the value into an integer // by moving the decimal point eight places to the right and // pulling digits off the right one at a time, formatting each digit // as an integer using this substitution's owning rule set // (this is slower, but more accurate, than doing it from the // other end) } else { // int32_t numberToFormat = (int32_t)uprv_round(transformNumber(number) * uprv_pow(10, kMaxDecimalDigits)); // // this flag keeps us from formatting trailing zeros. It starts // // out false because we're pulling from the right, and switches // // to true the first time we encounter a non-zero digit // UBool doZeros = FALSE; // for (int32_t i = 0; i < kMaxDecimalDigits; i++) { // int64_t digit = numberToFormat % 10; // if (digit != 0 || doZeros) { // if (doZeros && useSpaces) { // toInsertInto.insert(_pos + getPos(), gSpace); // } // doZeros = TRUE; // getRuleSet()->format(digit, toInsertInto, _pos + getPos()); // } // numberToFormat /= 10; // } DigitList dl; dl.set(number, 20, TRUE); UBool pad = FALSE; while (dl.fCount > (dl.fDecimalAt <= 0 ? 0 : dl.fDecimalAt)) { if (pad && useSpaces) { toInsertInto.insert(_pos + getPos(), gSpace); } else { pad = TRUE; } getRuleSet()->format((int64_t)(dl.fDigits[--dl.fCount] - '0'), toInsertInto, _pos + getPos()); } while (dl.fDecimalAt < 0) { if (pad && useSpaces) { toInsertInto.insert(_pos + getPos(), gSpace); } else { pad = TRUE; } getRuleSet()->format((int64_t)0, toInsertInto, _pos + getPos()); ++dl.fDecimalAt; } if (!pad) { // hack around lack of precision in digitlist. if we would end up with // "foo point" make sure we add a " zero" to the end. getRuleSet()->format((int64_t)0, toInsertInto, _pos + getPos()); } } }
static inline UBool isPerlOpen(const UnicodeString &pattern, int32_t pos) { UChar c; return pattern.charAt(pos)==BACKSLASH && ((c=pattern.charAt(pos+1))==LOWER_P || c==UPPER_P); }
static inline UBool isPOSIXOpen(const UnicodeString &pattern, int32_t pos) { return pattern.charAt(pos)==SET_OPEN && pattern.charAt(pos+1)==COLON; }
void printUnicodeString(const UnicodeString &s) { char charBuf[1000]; s.extract(0, s.length(), charBuf, sizeof(charBuf)-1, 0); charBuf[sizeof(charBuf)-1] = 0; printf("%s", charBuf); }
/** * Append c to buf, unless buf is empty or buf already ends in c. */ static void _smartAppend(UnicodeString& buf, UChar c) { if (buf.length() != 0 && buf.charAt(buf.length() - 1) != c) { buf.append(c); } }
BOOST_FIXTURE_TEST_CASE(test11, base_fixture_t) { // Тесты на ::FmtLoadStr FMTLOAD ::Format ::LoadStr ::LoadStrPart ::CutToChar ::TrimLeft ::TrimRight { UnicodeString str = ::FmtLoadStr(CONST_TEST_STRING, L"lalala", 42); // BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str())); // BOOST_TEST_MESSAGE("length = " << str.size()); BOOST_CHECK(W2MB(str.c_str()) == "test string: \"lalala\" 42"); } { UnicodeString str2 = FMTLOAD(CONST_TEST_STRING, L"lalala", 42); // BOOST_TEST_MESSAGE("str2 = " << W2MB(str2.c_str())); BOOST_CHECK(W2MB(str2.c_str()) == "test string: \"lalala\" 42"); } { UnicodeString str2 = ::Format(L"test: %s %d", L"lalala", 42); BOOST_TEST_MESSAGE("str2 = " << W2MB(str2.c_str())); BOOST_CHECK_EQUAL(0, wcscmp(str2.c_str(), L"test: lalala 42")); } { UnicodeString str3 = FORMAT(L"test: %s %d", L"lalala", 42); BOOST_TEST_MESSAGE("str3 = " << W2MB(str3.c_str())); BOOST_CHECK_EQUAL(0, wcscmp(str3.c_str(), L"test: lalala 42")); } { UnicodeString str = ::TrimLeft(L""); BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str())); BOOST_CHECK_EQUAL(0, wcscmp(str.c_str(), L"")); } { UnicodeString str = ::TrimLeft(L"1"); BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str())); BOOST_CHECK_EQUAL(0, wcscmp(str.c_str(), L"1")); } { UnicodeString str = ::TrimLeft(L" 1"); BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str())); BOOST_CHECK_EQUAL(0, wcscmp(str.c_str(), L"1")); } { UnicodeString str = ::TrimRight(L""); BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str())); BOOST_CHECK_EQUAL(0, wcscmp(str.c_str(), L"")); } { UnicodeString str = ::TrimRight(L"1"); BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str())); BOOST_CHECK_EQUAL(0, wcscmp(str.c_str(), L"1")); } { UnicodeString str = ::TrimRight(L"1 "); BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str())); BOOST_CHECK_EQUAL(0, wcscmp(str.c_str(), L"1")); } { // UnicodeString CutToChar(UnicodeString &Str, char Ch, bool Trim) UnicodeString Str1 = L" part 1 | part 2 "; UnicodeString str1 = ::CutToChar(Str1, '|', false); BOOST_TEST_MESSAGE("str1 = '" << W2MB(str1.c_str()) << "'"); BOOST_TEST_MESSAGE("Str1 = '" << W2MB(Str1.c_str()) << "'"); // BOOST_TEST_MESSAGE("Str1 = '" << W2MB(Str1.c_str()) << "'"); // DEBUG_PRINTF(L"str1 = \"%s\"", str1.c_str()); BOOST_CHECK_EQUAL(0, wcscmp(str1.c_str(), L" part 1 ")); UnicodeString str2 = ::CutToChar(Str1, '|', true); BOOST_TEST_MESSAGE("str2 = '" << W2MB(str2.c_str()) << "'"); BOOST_TEST_MESSAGE("Str1 = '" << W2MB(Str1.c_str()) << "'"); BOOST_CHECK_EQUAL(0, wcscmp(str2.c_str(), L" part 2")); } { UnicodeString str = ::LoadStr(CONST_TEST_STRING); BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str())); BOOST_CHECK_EQUAL(0, wcscmp(str.c_str(), L"test string: \"%s\" %d")); } { UnicodeString str = ::LoadStrPart(CONST_TEST_STRING2, 1); BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str())); BOOST_CHECK_EQUAL(0, wcscmp(str.c_str(), L"test string part 1")); } { UnicodeString str = ::LoadStrPart(CONST_TEST_STRING2, 2); BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str())); BOOST_CHECK_EQUAL(0, wcscmp(str.c_str(), L"part 2")); } }
UHashtable* ZoneMeta::createCanonicalMap(void) { UErrorCode status = U_ZERO_ERROR; UHashtable *canonicalMap = NULL; UResourceBundle *zoneFormatting = NULL; UResourceBundle *tzitem = NULL; UResourceBundle *aliases = NULL; StringEnumeration* tzenum = NULL; int32_t numZones; canonicalMap = uhash_open(uhash_hashUChars, uhash_compareUChars, NULL, &status); if (U_FAILURE(status)) { return NULL; } // no key deleter uhash_setValueDeleter(canonicalMap, deleteCanonicalMapEntry); zoneFormatting = ures_openDirect(NULL, gSupplementalData, &status); zoneFormatting = ures_getByKey(zoneFormatting, gZoneFormattingTag, zoneFormatting, &status); if (U_FAILURE(status)) { goto error_cleanup; } while (ures_hasNext(zoneFormatting)) { tzitem = ures_getNextResource(zoneFormatting, tzitem, &status); if (U_FAILURE(status)) { status = U_ZERO_ERROR; continue; } if (ures_getType(tzitem) != URES_TABLE) { continue; } int32_t canonicalLen; const UChar *canonical = ures_getStringByKey(tzitem, gCanonicalTag, &canonicalLen, &status); if (U_FAILURE(status)) { status = U_ZERO_ERROR; continue; } int32_t territoryLen; const UChar *territory = ures_getStringByKey(tzitem, gTerritoryTag, &territoryLen, &status); if (U_FAILURE(status)) { territory = NULL; status = U_ZERO_ERROR; } // Create canonical map entry CanonicalMapEntry *entry = (CanonicalMapEntry*)uprv_malloc(sizeof(CanonicalMapEntry)); if (entry == NULL) { status = U_MEMORY_ALLOCATION_ERROR; goto error_cleanup; } entry->id = canonical; if (territory == NULL || u_strcmp(territory, gWorld) == 0) { entry->country = NULL; } else { entry->country = territory; } // Put this entry in the hashtable. Since this hashtable has no key deleter, // key is treated as const, but must be passed as non-const. uhash_put(canonicalMap, (UChar*)canonical, entry, &status); if (U_FAILURE(status)) { goto error_cleanup; } // Get aliases aliases = ures_getByKey(tzitem, gAliasesTag, aliases, &status); if (U_FAILURE(status)) { // No aliases status = U_ZERO_ERROR; continue; } while (ures_hasNext(aliases)) { const UChar* alias = ures_getNextString(aliases, NULL, NULL, &status); if (U_FAILURE(status)) { status = U_ZERO_ERROR; continue; } // Create canonical map entry for this alias entry = (CanonicalMapEntry*)uprv_malloc(sizeof(CanonicalMapEntry)); if (entry == NULL) { status = U_MEMORY_ALLOCATION_ERROR; goto error_cleanup; } entry->id = canonical; if (territory == NULL || u_strcmp(territory, gWorld) == 0) { entry->country = NULL; } else { entry->country = territory; } // Put this entry in the hashtable. Since this hashtable has no key deleter, // key is treated as const, but must be passed as non-const. uhash_put(canonicalMap, (UChar*)alias, entry, &status); if (U_FAILURE(status)) { goto error_cleanup; } } } // Some available Olson zones are not included in CLDR data (such as Asia/Riyadh87). // Also, when we update Olson tzdata, new zones may be added. // This code scans all available zones in zoneinfo.res, and if any of them are // missing, add them to the map. tzenum = TimeZone::createEnumeration(); numZones = tzenum->count(status); if (U_SUCCESS(status)) { int32_t i; for (i = 0; i < numZones; i++) { const UnicodeString *zone = tzenum->snext(status); if (U_FAILURE(status)) { // We should not get here. status = U_ZERO_ERROR; continue; } UChar zoneUChars[ZID_KEY_MAX]; int32_t zoneUCharsLen = zone->extract(zoneUChars, ZID_KEY_MAX, status) + 1; // Add one for NUL termination if (U_FAILURE(status) || status==U_STRING_NOT_TERMINATED_WARNING) { status = U_ZERO_ERROR; continue; // zone id is too long to extract } CanonicalMapEntry *entry = (CanonicalMapEntry*)uhash_get(canonicalMap, zoneUChars); if (entry) { // Already included in CLDR data continue; } // Not in CLDR data, but it could be new one whose alias is available // in CLDR. int32_t nTzdataEquivalent = TimeZone::countEquivalentIDs(*zone); int32_t j; for (j = 0; j < nTzdataEquivalent; j++) { UnicodeString alias = TimeZone::getEquivalentID(*zone, j); if (alias == *zone) { continue; } UChar aliasUChars[ZID_KEY_MAX]; alias.extract(aliasUChars, ZID_KEY_MAX, status); if (U_FAILURE(status) || status==U_STRING_NOT_TERMINATED_WARNING) { status = U_ZERO_ERROR; continue; // zone id is too long to extract } entry = (CanonicalMapEntry*)uhash_get(canonicalMap, aliasUChars); if (entry != NULL) { break; } } // Create a new map entry CanonicalMapEntry* newEntry = (CanonicalMapEntry*)uprv_malloc(sizeof(CanonicalMapEntry)); int32_t idLen; if (newEntry == NULL) { status = U_MEMORY_ALLOCATION_ERROR; goto error_cleanup; } if (entry == NULL) { // Set dereferenced zone ID as the canonical ID UnicodeString derefZone; TimeZone::dereferOlsonLink(*zone, derefZone); if (derefZone.length() == 0) { // It should never happen.. but just in case derefZone = *zone; } idLen = derefZone.length() + 1; newEntry->id = allocUStringInTable(idLen); if (newEntry->id == NULL) { status = U_MEMORY_ALLOCATION_ERROR; uprv_free(newEntry); goto error_cleanup; } // Copy NULL terminated string derefZone.extract((UChar*)(newEntry->id), idLen, status); if (U_FAILURE(status)) { removeLastUStringFromTable(); uprv_free(newEntry); goto error_cleanup; } // No territory information available newEntry->country = NULL; } else { // Duplicate the entry newEntry->id = entry->id; newEntry->country = entry->country; } // Put this entry in the hashtable UChar *key = allocUStringInTable(zoneUCharsLen); if (key == NULL) { status = U_MEMORY_ALLOCATION_ERROR; deleteCanonicalMapEntry(newEntry); goto error_cleanup; } u_strncpy(key, zoneUChars, zoneUCharsLen); uhash_put(canonicalMap, key, newEntry, &status); if (U_FAILURE(status)) { goto error_cleanup; } } } normal_cleanup: ures_close(aliases); ures_close(tzitem); ures_close(zoneFormatting); delete tzenum; return canonicalMap; error_cleanup: if (canonicalMap != NULL) { uhash_close(canonicalMap); canonicalMap = NULL; } goto normal_cleanup; }
void DataDrivenFormatTest::testConvertDate(TestData *testData, const DataMap * /* settings */, UBool fmt) { UnicodeString kPATTERN("PATTERN="); // TODO: static UnicodeString kMILLIS("MILLIS="); // TODO: static UnicodeString kRELATIVE_MILLIS("RELATIVE_MILLIS="); // TODO: static UnicodeString kRELATIVE_ADD("RELATIVE_ADD:"); // TODO: static UErrorCode status = U_ZERO_ERROR; SimpleDateFormat basicFmt(UnicodeString("EEE MMM dd yyyy / YYYY'-W'ww-ee"), status); if (U_FAILURE(status)) { dataerrln("FAIL: Couldn't create basic SimpleDateFormat: %s", u_errorName(status)); return; } const DataMap *currentCase= NULL; // Start the processing int n = 0; while (testData->nextCase(currentCase, status)) { char calLoc[256] = ""; DateTimeStyleSet styleSet; UnicodeString pattern; UBool usePattern = FALSE; (void)usePattern; // Suppress unused warning. CalendarFieldsSet fromSet; UDate fromDate = 0; UBool useDate = FALSE; UDate now = Calendar::getNow(); ++n; char theCase[200]; sprintf(theCase, "case %d:", n); UnicodeString caseString(theCase, ""); // load params UnicodeString locale = currentCase->getString("locale", status); if (U_FAILURE(status)) { errln("case %d: No 'locale' line.", n); continue; } UnicodeString zone = currentCase->getString("zone", status); if (U_FAILURE(status)) { errln("case %d: No 'zone' line.", n); continue; } UnicodeString spec = currentCase->getString("spec", status); if(U_FAILURE(status)) { errln("case %d: No 'spec' line.", n); continue; } UnicodeString date = currentCase->getString("date", status); if(U_FAILURE(status)) { errln("case %d: No 'date' line.", n); continue; } UnicodeString expectStr= currentCase->getString("str", status); if(U_FAILURE(status)) { errln("case %d: No 'str' line.", n); continue; } DateFormat *format = NULL; // Process: 'locale' locale.extract(0, locale.length(), calLoc, (const char*)0); // default codepage. Invariant codepage doesn't have '@'! Locale loc(calLoc); if(spec.startsWith(kPATTERN)) { pattern = UnicodeString(spec,kPATTERN.length()); usePattern = TRUE; format = new SimpleDateFormat(pattern, loc, status); if(U_FAILURE(status)) { errln("case %d: could not create SimpleDateFormat from pattern: %s", n, u_errorName(status)); continue; } } else { if(styleSet.parseFrom(spec, status)<0 || U_FAILURE(status)) { errln("case %d: could not parse spec as style fields: %s", n, u_errorName(status)); continue; } format = DateFormat::createDateTimeInstance((DateFormat::EStyle)styleSet.getDateStyle(), (DateFormat::EStyle)styleSet.getTimeStyle(), loc); if(format == NULL ) { errln("case %d: could not create SimpleDateFormat from styles.", n); continue; } } Calendar *cal = Calendar::createInstance(loc, status); if(U_FAILURE(status)) { errln("case %d: could not create calendar from %s", n, calLoc); } if (zone.length() > 0) { TimeZone * tz = TimeZone::createTimeZone(zone); cal->setTimeZone(*tz); format->setTimeZone(*tz); delete tz; } // parse 'date' if(date.startsWith(kMILLIS)) { UnicodeString millis = UnicodeString(date, kMILLIS.length()); useDate = TRUE; fromDate = udbg_stod(millis); } else if(date.startsWith(kRELATIVE_MILLIS)) { UnicodeString millis = UnicodeString(date, kRELATIVE_MILLIS.length()); useDate = TRUE; fromDate = udbg_stod(millis) + now; } else if(date.startsWith(kRELATIVE_ADD)) { UnicodeString add = UnicodeString(date, kRELATIVE_ADD.length()); // "add" is a string indicating which fields to add if(fromSet.parseFrom(add, status)<0 || U_FAILURE(status)) { errln("case %d: could not parse date as RELATIVE_ADD calendar fields: %s", n, u_errorName(status)); continue; } useDate=TRUE; cal->clear(); cal->setTime(now, status); for (int q=0; q<UCAL_FIELD_COUNT; q++) { if (fromSet.isSet((UCalendarDateFields)q)) { //int32_t oldv = cal->get((UCalendarDateFields)q, status); if (q == UCAL_DATE) { cal->add((UCalendarDateFields)q, fromSet.get((UCalendarDateFields)q), status); } else { cal->set((UCalendarDateFields)q, fromSet.get((UCalendarDateFields)q)); } //int32_t newv = cal->get((UCalendarDateFields)q, status); } } fromDate = cal->getTime(status); if(U_FAILURE(status)) { errln("case %d: could not apply date as RELATIVE_ADD calendar fields: %s", n, u_errorName(status)); continue; } } else if(fromSet.parseFrom(date, status)<0 || U_FAILURE(status)) { errln("case %d: could not parse date as calendar fields: %s", n, u_errorName(status)); continue; } // now, do it. if (fmt) { FieldPosition pos; // logln((UnicodeString)"#"+n+" "+locale+"/"+from+" >>> "+toCalLoc+"/" // +to); cal->clear(); UnicodeString output; output.remove(); if(useDate) { // cal->setTime(fromDate, status); // if(U_FAILURE(status)) { // errln("case %d: could not set time on calendar: %s", n, u_errorName(status)); // continue; // } format->format(fromDate, output, pos, status); } else { fromSet.setOnCalendar(cal, status); if(U_FAILURE(status)) { errln("case %d: could not set fields on calendar: %s", n, u_errorName(status)); continue; } format->format(*cal, output, pos); } // check erro result from 'format' if(U_FAILURE(status)) { errln("case %d: could not format(): %s", n, u_errorName(status)); // TODO: use 'pos' } // if(pos.getBeginIndex()==0 && pos.getEndIndex()==0) { // TODO: more precise error? // errln("WARNING: case %d: format's pos returned (0,0) - error ??", n); // } if(output == expectStr) { logln(caseString+": format: SUCCESS! "+UnicodeString("expect=output=")+output); } else { UnicodeString result; UnicodeString result2; errln(caseString+": format: output!=expectStr, got " + *udbg_escape(output, &result) + " expected " + *udbg_escape(expectStr, &result2)); } } else { cal->clear(); ParsePosition pos; format->parse(expectStr,*cal,pos); if(useDate) { UDate gotDate = cal->getTime(status); if(U_FAILURE(status)) { errln(caseString+": parse: could not get time on calendar: "+UnicodeString(u_errorName(status))); continue; } if(gotDate == fromDate) { logln(caseString+": parse: SUCCESS! "+UnicodeString("gotDate=parseDate=")+expectStr); } else { UnicodeString expectDateStr, gotDateStr; basicFmt.format(fromDate,expectDateStr); basicFmt.format(gotDate,gotDateStr); errln(caseString+": parse: FAIL. parsed '"+expectStr+"' and got "+gotDateStr+", expected " + expectDateStr); } } else { // Calendar *cal2 = cal->clone(); // cal2->clear(); // fromSet.setOnCalendar(cal2, status); if(U_FAILURE(status)) { errln("case %d: parse: could not set fields on calendar: %s", n, u_errorName(status)); continue; } CalendarFieldsSet diffSet; // diffSet.clear(); if (!fromSet.matches(cal, diffSet, status)) { UnicodeString diffs = diffSet.diffFrom(fromSet, status); errln((UnicodeString)"FAIL: "+caseString +", Differences: '"+ diffs +"', status: "+ u_errorName(status)); } else if (U_FAILURE(status)) { errln("FAIL: "+caseString+" parse SET SOURCE calendar Failed to match: " +u_errorName(status)); } else { logln("PASS: "******" parse."); } } } delete cal; delete format; } // delete basicFmt; }
void QuantityFormatterTest::TestBasic() { UErrorCode status = U_ZERO_ERROR; #if !UCONFIG_NO_FORMATTING QuantityFormatter fmt; assertFalse( "adding bad variant", fmt.addIfAbsent("a bad variant", "{0} pounds", status)); assertEquals("adding bad variant status", (int32_t)U_ILLEGAL_ARGUMENT_ERROR, status); status = U_ZERO_ERROR; assertFalse( "Adding bad pattern", fmt.addIfAbsent("other", "{0} {1} too many placeholders", status)); assertEquals("adding bad pattern status", (int32_t)U_ILLEGAL_ARGUMENT_ERROR, status); status = U_ZERO_ERROR; assertFalse("isValid with no patterns", fmt.isValid()); assertTrue( "Adding good pattern with no placeholders", fmt.addIfAbsent("zero", "no placeholder", status)); assertTrue( "Adding good pattern", fmt.addIfAbsent("other", "{0} pounds", status)); assertTrue("isValid with other", fmt.isValid()); assertTrue( "Adding good pattern", fmt.addIfAbsent("one", "{0} pound", status)); assertEquals( "getByVariant", fmt.getByVariant("bad variant")->getTextWithNoArguments(), " pounds"); assertEquals( "getByVariant", fmt.getByVariant("other")->getTextWithNoArguments(), " pounds"); assertEquals( "getByVariant", fmt.getByVariant("one")->getTextWithNoArguments(), " pound"); assertEquals( "getByVariant", fmt.getByVariant("few")->getTextWithNoArguments(), " pounds"); // Test copy constructor { QuantityFormatter copied(fmt); assertEquals( "copied getByVariant", copied.getByVariant("other")->getTextWithNoArguments(), " pounds"); assertEquals( "copied getByVariant", copied.getByVariant("one")->getTextWithNoArguments(), " pound"); assertEquals( "copied getByVariant", copied.getByVariant("few")->getTextWithNoArguments(), " pounds"); } // Test assignment { QuantityFormatter assigned; assigned = fmt; assertEquals( "assigned getByVariant", assigned.getByVariant("other")->getTextWithNoArguments(), " pounds"); assertEquals( "assigned getByVariant", assigned.getByVariant("one")->getTextWithNoArguments(), " pound"); assertEquals( "assigned getByVariant", assigned.getByVariant("few")->getTextWithNoArguments(), " pounds"); } // Test format. { LocalPointer<NumberFormat> numfmt( NumberFormat::createInstance(Locale::getEnglish(), status)); LocalPointer<PluralRules> plurrule( PluralRules::forLocale("en", status)); FieldPosition pos(FieldPosition::DONT_CARE); UnicodeString appendTo; assertEquals( "format singular", UnicodeString("1 pound"), fmt.format( 1.0, *numfmt, *plurrule, appendTo, pos, status), TRUE); appendTo.remove(); assertEquals( "format plural", UnicodeString("2 pounds"), fmt.format( 2.0, *numfmt, *plurrule, appendTo, pos, status), TRUE); } fmt.reset(); assertFalse("isValid after reset", fmt.isValid()); #endif assertSuccess("", status); }
int main(int argc, const char *argv[]) { UErrorCode errorCode = U_ZERO_ERROR; // Get the unsafeBackwardsSet const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "Err: %s getting root cache entry\n", u_errorName(errorCode)); return 1; } const UVersionInfo &version = rootEntry->tailoring->version; const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet; char verString[20]; u_versionToString(version, verString); fprintf(stderr, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION, verString); int32_t rangeCount = unsafeBackwardSet->getRangeCount(); #if SERIALIZE fprintf(stderr, ".. serializing\n"); // UnicodeSet serialization UErrorCode preflightCode = U_ZERO_ERROR; // preflight int32_t serializedCount = unsafeBackwardSet->serialize(NULL,0,preflightCode); if(U_FAILURE(preflightCode) && preflightCode != U_BUFFER_OVERFLOW_ERROR) { fprintf(stderr, "Err: %s preflighting unicode set\n", u_errorName(preflightCode)); return 1; } uint16_t *serializedData = new uint16_t[serializedCount]; // serialize unsafeBackwardSet->serialize(serializedData, serializedCount, errorCode); if(U_FAILURE(errorCode)) { delete [] serializedData; fprintf(stderr, "Err: %s serializing unicodeset\n", u_errorName(errorCode)); return 1; } #endif #if PATTERN fprintf(stderr,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n"); // attempt to use pattern UnicodeString pattern; UnicodeSet set(*unsafeBackwardSet); set.compact(); set.toPattern(pattern, FALSE); if(U_SUCCESS(errorCode)) { // This fails (bug# ?) - which is why this method was abandoned. // UnicodeSet usA(pattern, errorCode); // fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode)); // return 1; } const UChar *buf = pattern.getBuffer(); int32_t needed = pattern.length(); // print { char buf2[2048]; int32_t len2 = pattern.extract(0, pattern.length(), buf2, "utf-8"); buf2[len2]=0; fprintf(stderr,"===\n%s\n===\n", buf2); } const UnicodeString unsafeBackwardPattern(FALSE, buf, needed); if(U_SUCCESS(errorCode)) { //UnicodeSet us(unsafeBackwardPattern, errorCode); // fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode)); } else { fprintf(stderr, "Uset OK - \n"); } #endif // Generate the output file. printf("// collunsafe.h\n"); printf("// %s\n", U_COPYRIGHT_STRING); printf("\n"); printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n"); printf("// Machine generated, do not edit.\n"); printf("\n"); printf("#ifndef COLLUNSAFE_H\n" "#define COLLUNSAFE_H\n" "\n" "#include \"unicode/utypes.h\"\n" "\n" "#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION "\"\n"); printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString); #if PATTERN printf("#define COLLUNSAFE_PATTERN 1\n"); printf("static const int32_t collunsafe_len = %d;\n", needed); printf("static const UChar collunsafe_pattern[collunsafe_len] = {\n"); for(int i=0;i<needed;i++) { if( (i>0) && (i%8 == 0) ) { printf(" // %d\n", i); } printf("0x%04X", buf[i]); // TODO check if(i != (needed-1)) { printf(", "); } } printf(" //%d\n};\n", (needed-1)); #endif #if RANGE fprintf(stderr, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n"); printf("#define COLLUNSAFE_RANGE 1\n"); printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount); printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount*2); for(int32_t i=0;i<rangeCount;i++) { printf(" 0x%04X, 0x%04X, // %d\n", unsafeBackwardSet->getRangeStart(i), unsafeBackwardSet->getRangeEnd(i), i); } printf("};\n"); #endif #if SERIALIZE printf("#define COLLUNSAFE_SERIALIZE 1\n"); printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount); printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount); for(int32_t i=0;i<serializedCount;i++) { if( (i>0) && (i%8 == 0) ) { printf(" // %d\n", i); } printf("0x%04X", serializedData[i]); // TODO check if(i != (serializedCount-1)) { printf(", "); } } printf("};\n"); #endif printf("#endif\n"); fflush(stderr); fflush(stdout); return(U_SUCCESS(errorCode)?0:1); }
std::string GlobalizationNDK::numberToString(const std::string& args) { if (args.empty()) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::numberToString: no arguments provided!"); return errorInJson(UNKNOWN_ERROR, "No arguments provided!"); } Json::Reader reader; Json::Value root; bool parse = reader.parse(args, root); if (!parse) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::numberToString: invalid json data: %s", args.c_str()); return errorInJson(PARSING_ERROR, "Invalid json data!"); } Json::Value nv = root["number"]; if (nv.isNull()) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::numberToString: no number provided!"); return errorInJson(FORMATTING_ERROR, "No number provided!"); } if (!nv.isNumeric()) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::numberToString: invalid number type: %d!", nv.type()); return errorInJson(FORMATTING_ERROR, "Invalid number type!"); } // This is the default value when no options provided. ENumberType type = kNumberDecimal; Json::Value options = root["options"]; std::string error; if (!handleNumberOptions(options, type, error)) return errorInJson(PARSING_ERROR, error); UErrorCode status = U_ZERO_ERROR; NumberFormat* nf; switch (type) { case kNumberDecimal: default: nf = NumberFormat::createInstance(status); break; case kNumberCurrency: nf = NumberFormat::createCurrencyInstance(status); break; case kNumberPercent: nf = NumberFormat::createPercentInstance(status); break; } if (!nf) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::numberToString: failed to create NumberFormat instance for type %d: %d", status, type); return errorInJson(UNKNOWN_ERROR, "Failed to create NumberFormat instance!"); } std::auto_ptr<NumberFormat> deleter(nf); UnicodeString result; nf->format(nv.asDouble(), result); std::string utf8; result.toUTF8String(utf8); return resultInJson(utf8); }
//------------------------------------------------------------------------------ // // findSetFor given a UnicodeString, // - find the corresponding Unicode Set (uset node) // (create one if necessary) // - Set fLeftChild of the caller's node (should be a setRef node) // to the uset node // Maintain a hash table of uset nodes, so the same one is always used // for the same string. // If a "to adopt" set is provided and we haven't seen this key before, // add the provided set to the hash table. // If the string is one (32 bit) char in length, the set contains // just one element which is the char in question. // If the string is "any", return a set containing all chars. // //------------------------------------------------------------------------------ void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt) { RBBISetTableEl *el; // First check whether we've already cached a set for this string. // If so, just use the cached set in the new node. // delete any set provided by the caller, since we own it. el = (RBBISetTableEl *)uhash_get(fSetTable, &s); if (el != NULL) { delete setToAdopt; node->fLeftChild = el->val; U_ASSERT(node->fLeftChild->fType == RBBINode::uset); return; } // Haven't seen this set before. // If the caller didn't provide us with a prebuilt set, // create a new UnicodeSet now. if (setToAdopt == NULL) { if (s.compare(kAny, -1) == 0) { setToAdopt = new UnicodeSet(0x000000, 0x10ffff); } else { UChar32 c; c = s.char32At(0); setToAdopt = new UnicodeSet(c, c); } } // // Make a new uset node to refer to this UnicodeSet // This new uset node becomes the child of the caller's setReference node. // RBBINode *usetNode = new RBBINode(RBBINode::uset); if (usetNode == NULL) { error(U_MEMORY_ALLOCATION_ERROR); return; } usetNode->fInputSet = setToAdopt; usetNode->fParent = node; node->fLeftChild = usetNode; usetNode->fText = s; // // Add the new uset node to the list of all uset nodes. // fRB->fUSetNodes->addElement(usetNode, *fRB->fStatus); // // Add the new set to the set hash table. // el = (RBBISetTableEl *)uprv_malloc(sizeof(RBBISetTableEl)); UnicodeString *tkey = new UnicodeString(s); if (tkey == NULL || el == NULL || setToAdopt == NULL) { // Delete to avoid memory leak delete tkey; tkey = NULL; uprv_free(el); el = NULL; delete setToAdopt; setToAdopt = NULL; error(U_MEMORY_ALLOCATION_ERROR); return; } el->key = tkey; el->val = usetNode; uhash_put(fSetTable, el->key, el, fRB->fStatus); return; }
std::string GlobalizationNDK::getNumberPattern(const std::string& args) { // This is the default value when no options provided. ENumberType type = kNumberDecimal; if (!args.empty()) { Json::Reader reader; Json::Value root; bool parse = reader.parse(args, root); if (!parse) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getNumberPattern: invalid json data: %s", args.c_str()); return errorInJson(PARSING_ERROR, "Invalid json data!"); } Json::Value options = root["options"]; std::string error; if (!handleNumberOptions(options, type, error)) return errorInJson(PARSING_ERROR, error); } std::string pattern, symbol, positive, negative, decimal, grouping; int fraction; double rounding; UErrorCode status = U_ZERO_ERROR; NumberFormat* nf; switch (type) { case kNumberDecimal: default: nf = NumberFormat::createInstance(status); break; case kNumberCurrency: nf = NumberFormat::createCurrencyInstance(status); break; case kNumberPercent: nf = NumberFormat::createPercentInstance(status); break; } if (!nf) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getNumberPattern: failed to create NumberFormat instance for type %d: %d", status, type); return errorInJson(UNKNOWN_ERROR, "Failed to create NumberFormat instance!"); } std::auto_ptr<NumberFormat> deleter(nf); if (nf->getDynamicClassID() != DecimalFormat::getStaticClassID()) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getNumberPattern: DecimalFormat expected: %p != %p", nf->getDynamicClassID(), DecimalFormat::getStaticClassID()); return errorInJson(UNKNOWN_ERROR, "DecimalFormat expected!"); } DecimalFormat* df = (DecimalFormat*) nf; const DecimalFormatSymbols* dfs = df->getDecimalFormatSymbols(); if (!dfs) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getNumberPattern: unable to get DecimalFormatSymbols!"); return errorInJson(UNKNOWN_ERROR, "Failed to get DecimalFormatSymbols instance!"); } UnicodeString ucs; df->toPattern(ucs); ucs.toUTF8String(pattern); ucs.remove(); df->getPositivePrefix(ucs); if (ucs.isEmpty()) df->getPositiveSuffix(ucs); ucs.toUTF8String(positive); ucs.remove(); df->getNegativePrefix(ucs); if (ucs.isEmpty()) df->getNegativeSuffix(ucs); ucs.toUTF8String(negative); ucs.remove(); rounding = df->getRoundingIncrement(); fraction = df->getMaximumFractionDigits(); ucs = dfs->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol); ucs.toUTF8String(decimal); ucs.remove(); ucs = dfs->getSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol); ucs.toUTF8String(grouping); ucs.remove(); if (type == kNumberPercent) ucs = dfs->getSymbol(DecimalFormatSymbols::kPercentSymbol); else if (type == kNumberCurrency) ucs = dfs->getSymbol(DecimalFormatSymbols::kCurrencySymbol); else ucs = dfs->getSymbol(DecimalFormatSymbols::kDigitSymbol); ucs.toUTF8String(symbol); ucs.remove(); return resultInJson(pattern, symbol, fraction, rounding, positive, negative, decimal, grouping); }
/* {{{ timezone_convert_to_datetimezone * Convert from TimeZone to DateTimeZone object */ U_CFUNC zval *timezone_convert_to_datetimezone(const TimeZone *timeZone, intl_error *outside_error, const char *func, zval *ret) { UnicodeString id; char *message = NULL; php_timezone_obj *tzobj; zval arg; timeZone->getID(id); if (id.isBogus()) { spprintf(&message, 0, "%s: could not obtain TimeZone id", func); intl_errors_set(outside_error, U_ILLEGAL_ARGUMENT_ERROR, message, 1); goto error; } object_init_ex(ret, php_date_get_timezone_ce()); tzobj = Z_PHPTIMEZONE_P(ret); if (id.compare(0, 3, UnicodeString("GMT", sizeof("GMT")-1, US_INV)) == 0) { /* The DateTimeZone constructor doesn't support offset time zones, * so we must mess with DateTimeZone structure ourselves */ tzobj->initialized = 1; tzobj->type = TIMELIB_ZONETYPE_OFFSET; //convert offset from milliseconds to seconds tzobj->tzi.utc_offset = timeZone->getRawOffset() / 1000; } else { zend_string *u8str; /* Call the constructor! */ u8str = intl_charFromString(id, &INTL_ERROR_CODE(*outside_error)); if (!u8str) { spprintf(&message, 0, "%s: could not convert id to UTF-8", func); intl_errors_set(outside_error, INTL_ERROR_CODE(*outside_error), message, 1); goto error; } ZVAL_STR(&arg, u8str); zend_call_method_with_1_params(Z_OBJ_P(ret), NULL, &Z_OBJCE_P(ret)->constructor, "__construct", NULL, &arg); if (EG(exception)) { spprintf(&message, 0, "%s: DateTimeZone constructor threw exception", func); intl_errors_set(outside_error, U_ILLEGAL_ARGUMENT_ERROR, message, 1); zend_object_store_ctor_failed(Z_OBJ_P(ret)); zval_ptr_dtor(&arg); goto error; } zval_ptr_dtor(&arg); } if (0) { error: if (ret) { zval_ptr_dtor(ret); } ret = NULL; } if (message) { efree(message); } return ret; }
static inline UBool isNameOpen(const UnicodeString &pattern, int32_t pos) { return pattern.charAt(pos)==BACKSLASH && pattern.charAt(pos+1)==UPPER_N; }
void CalendarLimitTest::doLimitsTest(Calendar& cal, const int32_t* fieldsToTest, UDate startDate, int32_t testDuration) { static const int32_t FIELDS[] = { UCAL_ERA, UCAL_YEAR, UCAL_MONTH, UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DAY_OF_MONTH, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_YEAR_WOY, UCAL_EXTENDED_YEAR, -1, }; static const char* FIELD_NAME[] = { "ERA", "YEAR", "MONTH", "WEEK_OF_YEAR", "WEEK_OF_MONTH", "DAY_OF_MONTH", "DAY_OF_YEAR", "DAY_OF_WEEK", "DAY_OF_WEEK_IN_MONTH", "AM_PM", "HOUR", "HOUR_OF_DAY", "MINUTE", "SECOND", "MILLISECOND", "ZONE_OFFSET", "DST_OFFSET", "YEAR_WOY", "DOW_LOCAL", "EXTENDED_YEAR", "JULIAN_DAY", "MILLISECONDS_IN_DAY", "IS_LEAP_MONTH" }; UErrorCode status = U_ZERO_ERROR; int32_t i, j; UnicodeString ymd; GregorianCalendar greg(status); if (failure(status, "new GregorianCalendar")) { return; } greg.setTime(startDate, status); if (failure(status, "GregorianCalendar::setTime")) { return; } logln((UnicodeString)"Start: " + startDate); if (fieldsToTest == NULL) { fieldsToTest = FIELDS; } // Keep a record of minima and maxima that we actually see. // These are kept in an array of arrays of hashes. int32_t limits[UCAL_FIELD_COUNT][4]; for (j = 0; j < UCAL_FIELD_COUNT; j++) { limits[j][0] = INT32_MAX; limits[j][1] = INT32_MIN; limits[j][2] = INT32_MAX; limits[j][3] = INT32_MIN; } // This test can run for a long time; show progress. UDate millis = ucal_getNow(); UDate mark = millis + 5000; // 5 sec millis -= testDuration * 1000; // stop time if testDuration<0 for (i = 0; testDuration > 0 ? i < testDuration : ucal_getNow() < millis; ++i) { if (ucal_getNow() >= mark) { logln((UnicodeString)"(" + i + " days)"); mark += 5000; // 5 sec } cal.setTime(greg.getTime(status), status); cal.setMinimalDaysInFirstWeek(1); if (failure(status, "Calendar set/getTime")) { return; } for (j = 0; fieldsToTest[j] >= 0; ++j) { UCalendarDateFields f = (UCalendarDateFields)fieldsToTest[j]; int32_t v = cal.get(f, status); int32_t minActual = cal.getActualMinimum(f, status); int32_t maxActual = cal.getActualMaximum(f, status); int32_t minLow = cal.getMinimum(f); int32_t minHigh = cal.getGreatestMinimum(f); int32_t maxLow = cal.getLeastMaximum(f); int32_t maxHigh = cal.getMaximum(f); if (limits[j][0] > minActual) { // the minimum limits[j][0] = minActual; } if (limits[j][1] < minActual) { // the greatest minimum limits[j][1] = minActual; } if (limits[j][2] > maxActual) { // the least maximum limits[j][2] = maxActual; } if (limits[j][3] < maxActual) { // the maximum limits[j][3] = maxActual; } if (minActual < minLow || minActual > minHigh) { errln((UnicodeString)"Fail: [" + cal.getType() + "] " + ymdToString(cal, ymd) + " Range for min of " + FIELD_NAME[f] + "(" + f + ")=" + minLow + ".." + minHigh + ", actual_min=" + minActual); } if (maxActual < maxLow || maxActual > maxHigh) { errln((UnicodeString)"Fail: [" + cal.getType() + "] " + ymdToString(cal, ymd) + " Range for max of " + FIELD_NAME[f] + "(" + f + ")=" + maxLow + ".." + maxHigh + ", actual_max=" + maxActual); } if (v < minActual || v > maxActual) { errln((UnicodeString)"Fail: [" + cal.getType() + "] " + ymdToString(cal, ymd) + " " + FIELD_NAME[f] + "(" + f + ")=" + v + ", actual range=" + minActual + ".." + maxActual + ", allowed=(" + minLow + ".." + minHigh + ")..(" + maxLow + ".." + maxHigh + ")"); } } greg.add(UCAL_DAY_OF_YEAR, 1, status); if (failure(status, "Calendar::add")) { return; } } // Check actual maxima and minima seen against ranges returned // by API. UnicodeString buf; for (j = 0; fieldsToTest[j] >= 0; ++j) { int32_t rangeLow, rangeHigh; UBool fullRangeSeen = TRUE; UCalendarDateFields f = (UCalendarDateFields)fieldsToTest[j]; buf.remove(); buf.append((UnicodeString)"[" + cal.getType() + "] " + FIELD_NAME[f]); // Minumum rangeLow = cal.getMinimum(f); rangeHigh = cal.getGreatestMinimum(f); if (limits[j][0] != rangeLow || limits[j][1] != rangeHigh) { fullRangeSeen = FALSE; } buf.append((UnicodeString)" minima range=" + rangeLow + ".." + rangeHigh); buf.append((UnicodeString)" minima actual=" + limits[j][0] + ".." + limits[j][1]); // Maximum rangeLow = cal.getLeastMaximum(f); rangeHigh = cal.getMaximum(f); if (limits[j][2] != rangeLow || limits[j][3] != rangeHigh) { fullRangeSeen = FALSE; } buf.append((UnicodeString)" maxima range=" + rangeLow + ".." + rangeHigh); buf.append((UnicodeString)" maxima actual=" + limits[j][2] + ".." + limits[j][3]); if (fullRangeSeen) { logln((UnicodeString)"OK: " + buf); } else { // This may or may not be an error -- if the range of dates // we scan over doesn't happen to contain a minimum or // maximum, it doesn't mean some other range won't. logln((UnicodeString)"Warning: " + buf); } } logln((UnicodeString)"End: " + greg.getTime(status)); }
/** * Return true if the given position, in the given pattern, appears * to be the start of a UnicodeSet pattern. */ UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) { return ((pos+1) < pattern.length() && pattern.charAt(pos) == (UChar)91/*[*/) || resemblesPropertyPattern(pattern, pos); }
int main(int argc, char **argv) { Calendar *cal; TimeZone *zone; DateFormat *fmt; UErrorCode status = U_ZERO_ERROR; UnicodeString str; UDate date; // The languages in which we will display the date static char* LANGUAGE[] = { "en", "de", "fr" }; static const int32_t N_LANGUAGE = sizeof(LANGUAGE)/sizeof(LANGUAGE[0]); // The time zones in which we will display the time static char* TIMEZONE[] = { "America/Los_Angeles", "America/New_York", "Europe/Paris", "Europe/Berlin" }; static const int32_t N_TIMEZONE = sizeof(TIMEZONE)/sizeof(TIMEZONE[0]); // Create a calendar cal = Calendar::createInstance(status); check(status, "Calendar::createInstance"); zone = createZone("GMT"); // Create a GMT zone cal->adoptTimeZone(zone); cal->clear(); cal->set(1999, Calendar::JUNE, 4); date = cal->getTime(status); check(status, "Calendar::getTime"); for (int32_t i=0; i<N_LANGUAGE; ++i) { Locale loc(LANGUAGE[i]); // Create a formatter for DATE and TIME fmt = DateFormat::createDateTimeInstance( DateFormat::kFull, DateFormat::kFull, loc); for (int32_t j=0; j<N_TIMEZONE; ++j) { cal->adoptTimeZone(createZone(TIMEZONE[j])); fmt->setCalendar(*cal); // Format the date str.remove(); fmt->format(date, str, status); // Display the formatted date string printf("Date (%s, %s): ", LANGUAGE[i], TIMEZONE[j]); uprintf(escape(str)); printf("\n\n"); } delete fmt; } printf("Exiting successfully\n"); return 0; }
void CurrencyPluralInfo::setupCurrencyPluralPattern(const Locale& loc, UErrorCode& status) { if (U_FAILURE(status)) { return; } if (fPluralCountToCurrencyUnitPattern) { deleteHash(fPluralCountToCurrencyUnitPattern); } fPluralCountToCurrencyUnitPattern = initHash(status); if (U_FAILURE(status)) { return; } NumberingSystem *ns = NumberingSystem::createInstance(loc,status); UErrorCode ec = U_ZERO_ERROR; UResourceBundle *rb = ures_open(NULL, loc.getName(), &ec); UResourceBundle *numElements = ures_getByKeyWithFallback(rb, gNumberElementsTag, NULL, &ec); rb = ures_getByKeyWithFallback(numElements, ns->getName(), rb, &ec); rb = ures_getByKeyWithFallback(rb, gPatternsTag, rb, &ec); int32_t ptnLen; const UChar* numberStylePattern = ures_getStringByKeyWithFallback(rb, gDecimalFormatTag, &ptnLen, &ec); // Fall back to "latn" if num sys specific pattern isn't there. if ( ec == U_MISSING_RESOURCE_ERROR && uprv_strcmp(ns->getName(),gLatnTag)) { ec = U_ZERO_ERROR; rb = ures_getByKeyWithFallback(numElements, gLatnTag, rb, &ec); rb = ures_getByKeyWithFallback(rb, gPatternsTag, rb, &ec); numberStylePattern = ures_getStringByKeyWithFallback(rb, gDecimalFormatTag, &ptnLen, &ec); } int32_t numberStylePatternLen = ptnLen; const UChar* negNumberStylePattern = NULL; int32_t negNumberStylePatternLen = 0; // TODO: Java // parse to check whether there is ";" separator in the numberStylePattern UBool hasSeparator = false; if (U_SUCCESS(ec)) { for (int32_t styleCharIndex = 0; styleCharIndex < ptnLen; ++styleCharIndex) { if (numberStylePattern[styleCharIndex] == gNumberPatternSeparator) { hasSeparator = true; // split the number style pattern into positive and negative negNumberStylePattern = numberStylePattern + styleCharIndex + 1; negNumberStylePatternLen = ptnLen - styleCharIndex - 1; numberStylePatternLen = styleCharIndex; } } } ures_close(numElements); ures_close(rb); delete ns; if (U_FAILURE(ec)) { return; } UResourceBundle *currRb = ures_open(U_ICUDATA_CURR, loc.getName(), &ec); UResourceBundle *currencyRes = ures_getByKeyWithFallback(currRb, gCurrUnitPtnTag, NULL, &ec); #ifdef CURRENCY_PLURAL_INFO_DEBUG std::cout << "in set up\n"; #endif StringEnumeration* keywords = fPluralRules->getKeywords(ec); if (U_SUCCESS(ec)) { const char* pluralCount; while ((pluralCount = keywords->next(NULL, ec)) != NULL) { if ( U_SUCCESS(ec) ) { int32_t ptnLen; UErrorCode err = U_ZERO_ERROR; const UChar* patternChars = ures_getStringByKeyWithFallback( currencyRes, pluralCount, &ptnLen, &err); if (U_SUCCESS(err) && ptnLen > 0) { UnicodeString* pattern = new UnicodeString(patternChars, ptnLen); #ifdef CURRENCY_PLURAL_INFO_DEBUG char result_1[1000]; pattern->extract(0, pattern->length(), result_1, "UTF-8"); std::cout << "pluralCount: " << pluralCount << "; pattern: " << result_1 << "\n"; #endif pattern->findAndReplace(UnicodeString(TRUE, gPart0, 3), UnicodeString(numberStylePattern, numberStylePatternLen)); pattern->findAndReplace(UnicodeString(TRUE, gPart1, 3), UnicodeString(TRUE, gTripleCurrencySign, 3)); if (hasSeparator) { UnicodeString negPattern(patternChars, ptnLen); negPattern.findAndReplace(UnicodeString(TRUE, gPart0, 3), UnicodeString(negNumberStylePattern, negNumberStylePatternLen)); negPattern.findAndReplace(UnicodeString(TRUE, gPart1, 3), UnicodeString(TRUE, gTripleCurrencySign, 3)); pattern->append(gNumberPatternSeparator); pattern->append(negPattern); } #ifdef CURRENCY_PLURAL_INFO_DEBUG pattern->extract(0, pattern->length(), result_1, "UTF-8"); std::cout << "pluralCount: " << pluralCount << "; pattern: " << result_1 << "\n"; #endif fPluralCountToCurrencyUnitPattern->put(UnicodeString(pluralCount, -1, US_INV), pattern, status); } } } } delete keywords; ures_close(currencyRes); ures_close(currRb); }
static void appendRange( const UnicodeString &src, int32_t end, UnicodeString &dest) { dest.append(src, end, src.length() - end); }
NFSubstitution::NFSubstitution(int32_t _pos, const NFRuleSet* _ruleSet, const RuleBasedNumberFormat* formatter, const UnicodeString& description, UErrorCode& status) : pos(_pos), ruleSet(NULL), numberFormat(NULL) { // the description should begin and end with the same character. // If it doesn't that's a syntax error. Otherwise, // makeSubstitution() was the only thing that needed to know // about these characters, so strip them off UnicodeString workingDescription(description); if (description.length() >= 2 && description.charAt(0) == description.charAt(description.length() - 1)) { workingDescription.remove(description.length() - 1, 1); workingDescription.remove(0, 1); } else if (description.length() != 0) { // throw new IllegalArgumentException("Illegal substitution syntax"); status = U_PARSE_ERROR; return; } // if the description was just two paired token characters // (i.e., "<<" or ">>"), it uses the rule set it belongs to to // format its result if (workingDescription.length() == 0) { this->ruleSet = _ruleSet; } // if the description contains a rule set name, that's the rule // set we use to format the result: get a reference to the // names rule set else if (workingDescription.charAt(0) == gPercent) { this->ruleSet = formatter->findRuleSet(workingDescription, status); } // if the description begins with 0 or #, treat it as a // DecimalFormat pattern, and initialize a DecimalFormat with // that pattern (then set it to use the DecimalFormatSymbols // belonging to our formatter) else if (workingDescription.charAt(0) == gPound || workingDescription.charAt(0) ==gZero) { DecimalFormatSymbols* sym = formatter->getDecimalFormatSymbols(); if (!sym) { status = U_MISSING_RESOURCE_ERROR; return; } this->numberFormat = new DecimalFormat(workingDescription, *sym, status); /* test for NULL */ if (this->numberFormat == 0) { status = U_MEMORY_ALLOCATION_ERROR; return; } if (U_FAILURE(status)) { delete (DecimalFormat*)this->numberFormat; this->numberFormat = NULL; return; } // this->numberFormat->setDecimalFormatSymbols(formatter->getDecimalFormatSymbols()); } // if the description is ">>>", this substitution bypasses the // usual rule-search process and always uses the rule that precedes // it in its own rule set's rule list (this is used for place-value // notations: formats where you want to see a particular part of // a number even when it's 0) else if (workingDescription.charAt(0) == gGreaterThan) { // this causes problems when >>> is used in a frationalPartSubstitution // this->ruleSet = NULL; this->ruleSet = _ruleSet; this->numberFormat = NULL; } // and of the description is none of these things, it's a syntax error else { // throw new IllegalArgumentException("Illegal substitution syntax"); status = U_PARSE_ERROR; } }
std::string GlobalizationNDK::getCurrencyPattern(const std::string& args) { if (args.empty()) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getCurrencyPattern: no arguments provided!"); return errorInJson(UNKNOWN_ERROR, "No arguments provided!"); } Json::Reader reader; Json::Value root; bool parse = reader.parse(args, root); if (!parse) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getCurrencyPattern: invalid json data: %s", args.c_str()); return errorInJson(PARSING_ERROR, "Invalid json data!"); } Json::Value ccv = root["currencyCode"]; if (ccv.isNull()) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getCurrencyPattern: no currencyCode provided!"); return errorInJson(FORMATTING_ERROR, "No currencyCode provided!"); } if (!ccv.isString()) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getCurrencyPattern: invalid currencyCode type: %d!", ccv.type()); return errorInJson(FORMATTING_ERROR, "Invalid currencyCode type!"); } std::string cc = ccv.asString(); if (cc.empty()) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getCurrencyPattern: empty currencyCode!"); return errorInJson(FORMATTING_ERROR, "Empty currencyCode!"); } UnicodeString ucc = UnicodeString::fromUTF8(cc); DecimalFormat* df = 0; int count = 0; const Locale* locs = Locale::getAvailableLocales(count); for (int i = 0; i < count; ++i) { UErrorCode status = U_ZERO_ERROR; NumberFormat* nf = NumberFormat::createCurrencyInstance(*(locs + i), status); if (!nf) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getCurrencyPattern: locale %d: unable to get NumberFormat instance!", i); continue; } std::auto_ptr<NumberFormat> ndeleter(nf); const UChar* currency = nf->getCurrency(); if (!currency) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getCurrencyPattern: locale %d: failed to getCurrency!", i); continue; } if (!ucc.compare(currency, -1)) { df = (DecimalFormat*) ndeleter.release(); break; } } if (!df) return errorInJson(UNKNOWN_ERROR, "Currency not supported!"); std::auto_ptr<DecimalFormat> deleter(df); const DecimalFormatSymbols* dfs = df->getDecimalFormatSymbols(); if (!dfs) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getCurrencyPattern: unable to get DecimalFormatSymbols!"); return errorInJson(UNKNOWN_ERROR, "Failed to get DecimalFormatSymbols!"); } UnicodeString ucs; std::string pattern; df->toPattern(ucs); ucs.toUTF8String(pattern); ucs.remove(); int fraction = df->getMaximumFractionDigits(); double rounding = df->getRoundingIncrement(); std::string decimal; ucs = dfs->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol); ucs.toUTF8String(decimal); ucs.remove(); std::string grouping; ucs = dfs->getSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol); ucs.toUTF8String(grouping); ucs.remove(); return resultInJson(pattern, cc, fraction, rounding, decimal, grouping); }
Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID, TransliteratorParser& parser, TransliteratorAlias*& aliasReturn, UErrorCode& status) { U_ASSERT(aliasReturn == NULL); TransliteratorEntry *entry = find(ID); if (entry == 0) { // We get to this point if there are two threads, one of which // is instantiating an ID, and another of which is removing // the same ID from the registry, and the timing is just right. return 0; } // The usage model for the caller is that they will first call // reg->get() inside the mutex, they'll get back an alias, they call // alias->isRuleBased(), and if they get TRUE, they call alias->parse() // outside the mutex, then reg->reget() inside the mutex again. A real // mess, but it gets things working for ICU 3.0. [alan]. // Note: It's possible that in between the caller calling // alias->parse() and reg->reget(), that another thread will have // called reg->reget(), and the entry will already have been fixed up. // We have to detect this so we don't stomp over existing entry // data members and potentially leak memory (u.data and compoundFilter). if (entry->entryType == TransliteratorEntry::RULES_FORWARD || entry->entryType == TransliteratorEntry::RULES_REVERSE || entry->entryType == TransliteratorEntry::LOCALE_RULES) { if (parser.idBlockVector.isEmpty() && parser.dataVector.isEmpty()) { entry->u.data = 0; entry->entryType = TransliteratorEntry::ALIAS; entry->stringArg = UNICODE_STRING_SIMPLE("Any-NULL"); } else if (parser.idBlockVector.isEmpty() && parser.dataVector.size() == 1) { entry->u.data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); entry->entryType = TransliteratorEntry::RBT_DATA; } else if (parser.idBlockVector.size() == 1 && parser.dataVector.isEmpty()) { entry->stringArg = *(UnicodeString*)(parser.idBlockVector.elementAt(0)); entry->compoundFilter = parser.orphanCompoundFilter(); entry->entryType = TransliteratorEntry::ALIAS; } else { entry->entryType = TransliteratorEntry::COMPOUND_RBT; entry->compoundFilter = parser.orphanCompoundFilter(); entry->u.dataVector = new UVector(status); entry->stringArg.remove(); int32_t limit = parser.idBlockVector.size(); if (parser.dataVector.size() > limit) limit = parser.dataVector.size(); for (int32_t i = 0; i < limit; i++) { if (i < parser.idBlockVector.size()) { UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i); if (!idBlock->isEmpty()) entry->stringArg += *idBlock; } if (!parser.dataVector.isEmpty()) { TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); entry->u.dataVector->addElement(data, status); entry->stringArg += (UChar)0xffff; // use U+FFFF to mark position of RBTs in ID block } } } } Transliterator *t = instantiateEntry(ID, entry, aliasReturn, status); return t; }
TransliteratorEntry* TransliteratorRegistry::findInBundle(const TransliteratorSpec& specToOpen, const TransliteratorSpec& specToFind, const UnicodeString& variant, UTransDirection direction) { UnicodeString utag; UnicodeString resStr; int32_t pass; for (pass=0; pass<2; ++pass) { utag.truncate(0); // First try either TransliteratorTo_xxx or // TransliterateFrom_xxx, then try the bidirectional // Transliterate_xxx. This precedence order is arbitrary // but must be consistent and documented. if (pass == 0) { utag.append(direction == UTRANS_FORWARD ? TRANSLITERATE_TO : TRANSLITERATE_FROM); } else { utag.append(TRANSLITERATE); } UnicodeString s(specToFind.get()); utag.append(s.toUpper("")); CharString tag(utag); UErrorCode status = U_ZERO_ERROR; ResourceBundle subres(specToOpen.getBundle().get(tag, status)); if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) { continue; } s.truncate(0); if (specToOpen.get() != LocaleUtility::initNameFromLocale(subres.getLocale(), s)) { continue; } if (variant.length() != 0) { CharString var(variant); status = U_ZERO_ERROR; resStr = subres.getStringEx(var, status); if (U_SUCCESS(status)) { // Exit loop successfully break; } } else { // Variant is empty, which means match the first variant listed. status = U_ZERO_ERROR; resStr = subres.getStringEx(1, status); if (U_SUCCESS(status)) { // Exit loop successfully break; } } } if (pass==2) { // Failed return NULL; } // We have succeeded in loading a string from the locale // resources. Create a new registry entry to hold it and return it. TransliteratorEntry *entry = new TransliteratorEntry(); if (entry != 0) { // The direction is always forward for the // TransliterateTo_xxx and TransliterateFrom_xxx // items; those are unidirectional forward rules. // For the bidirectional Transliterate_xxx items, // the direction is the value passed in to this // function. int32_t dir = (pass == 0) ? UTRANS_FORWARD : direction; entry->entryType = TransliteratorEntry::LOCALE_RULES; entry->stringArg = resStr; entry->intArg = dir; } return entry; }
/** * Implements {@link Transliterator#handleTransliterate}. */ void TitlecaseTransliterator::handleTransliterate( Replaceable& text, UTransPosition& offsets, UBool isIncremental) const { // TODO reimplement, see ustrcase.c // using a real word break iterator // instead of just looking for a transition between cased and uncased characters // call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap) // needs to take isIncremental into account because case mappings are context-sensitive // also detect when lowercasing function did not finish because of context if (offsets.start >= offsets.limit) { return; } // case type: >0 cased (UCASE_LOWER etc.) ==0 uncased <0 case-ignorable int32_t type; // Our mode; we are either converting letter toTitle or // toLower. UBool doTitle = TRUE; // Determine if there is a preceding context of cased case-ignorable*, // in which case we want to start in toLower mode. If the // prior context is anything else (including empty) then start // in toTitle mode. UChar32 c; int32_t start; for (start = offsets.start - 1; start >= offsets.contextStart; start -= U16_LENGTH(c)) { c = text.char32At(start); type=ucase_getTypeOrIgnorable(c); if(type>0) { // cased doTitle=FALSE; break; } else if(type==0) { // uncased but not ignorable break; } // else (type<0) case-ignorable: continue } // Convert things after a cased character toLower; things // after an uncased, non-case-ignorable character toTitle. Case-ignorable // characters are copied directly and do not change the mode. UCaseContext csc; uprv_memset(&csc, 0, sizeof(csc)); csc.p = &text; csc.start = offsets.contextStart; csc.limit = offsets.contextLimit; UnicodeString tmp; const UChar *s; int32_t textPos, delta, result; for(textPos=offsets.start; textPos<offsets.limit;) { csc.cpStart=textPos; c=text.char32At(textPos); csc.cpLimit=textPos+=U16_LENGTH(c); type=ucase_getTypeOrIgnorable(c); if(type>=0) { // not case-ignorable if(doTitle) { result=ucase_toFullTitle(c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT); } else { result=ucase_toFullLower(c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT); } doTitle = (UBool)(type==0); // doTitle=isUncased if(csc.b1 && isIncremental) { // fMap() tried to look beyond the context limit // wait for more input offsets.start=csc.cpStart; return; } if(result>=0) { // replace the current code point with its full case mapping result // see UCASE_MAX_STRING_LENGTH if(result<=UCASE_MAX_STRING_LENGTH) { // string s[result] tmp.setTo(FALSE, s, result); delta=result-U16_LENGTH(c); } else { // single code point tmp.setTo(result); delta=tmp.length()-U16_LENGTH(c); } text.handleReplaceBetween(csc.cpStart, textPos, tmp); if(delta!=0) { textPos+=delta; csc.limit=offsets.contextLimit+=delta; offsets.limit+=delta; } } } } offsets.start=textPos; }
std::string GlobalizationNDK::getDateNames(const std::string& args) { ENamesType type = kNamesWide; ENamesItem item = kNamesMonths; if (!args.empty()) { Json::Reader reader; Json::Value root; bool parse = reader.parse(args, root); if (!parse) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getDateNames: invalid json data: %s", args.c_str()); return errorInJson(PARSING_ERROR, "Parameters not valid json format!"); } Json::Value options = root["options"]; std::string error; if (!handleNamesOptions(options, type, item, error)) return errorInJson(PARSING_ERROR, error); } int count; const char* pattern; DateFormat::EStyle dstyle; // Check ICU SimpleDateFormat document for patterns for months and days. // http://www.icu-project.org/apiref/icu4c/classicu_1_1SimpleDateFormat.html if (item == kNamesMonths) { count = 12; if (type == kNamesWide) { dstyle = DateFormat::kLong; pattern = "MMMM"; } else { dstyle = DateFormat::kShort; pattern = "MMM"; } } else { count = 7; if (type == kNamesWide) { dstyle = DateFormat::kLong; pattern = "eeee"; } else { dstyle = DateFormat::kShort; pattern = "eee"; } } UErrorCode status = U_ZERO_ERROR; const Locale& loc = Locale::getDefault(); DateFormat* df = DateFormat::createDateInstance(dstyle, loc); if (!df) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getDateNames: unable to create DateFormat instance!"); return errorInJson(UNKNOWN_ERROR, "Unable to create DateFormat instance!"); } std::auto_ptr<DateFormat> deleter(df); if (df->getDynamicClassID() != SimpleDateFormat::getStaticClassID()) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getDateNames: DateFormat instance not SimpleDateFormat!"); return errorInJson(UNKNOWN_ERROR, "DateFormat instance not SimpleDateFormat!"); } SimpleDateFormat* sdf = (SimpleDateFormat*) df; sdf->applyLocalizedPattern(UnicodeString(pattern, -1), status); Calendar* cal = Calendar::createInstance(status); if (!cal) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getDateNames: unable to create Calendar instance: %x.", status); return errorInJson(UNKNOWN_ERROR, "Unable to create Calendar instance!"); } std::auto_ptr<Calendar> caldeleter(cal); UCalendarDaysOfWeek ud = cal->getFirstDayOfWeek(status); if (status != U_ZERO_ERROR && status != U_ERROR_WARNING_START) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getDateNames: failed to getFirstDayOfWeek: %d!", status); return errorInJson(PARSING_ERROR, "Failed to getFirstDayOfWeek!"); } if (ud == UCAL_SUNDAY) cal->set(2014, 0, 5); else cal->set(2014, 0, 6); std::list<std::string> utf8Names; for (int i = 0; i < count; ++i) { UnicodeString ucs; sdf->format(cal->getTime(status), ucs); if (item == kNamesMonths) cal->add(UCAL_MONTH, 1, status); else cal->add(UCAL_DAY_OF_MONTH, 1, status); if (ucs.isEmpty()) continue; std::string utf8; ucs.toUTF8String(utf8); utf8Names.push_back(utf8); } if (!utf8Names.size()) { slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getDateNames: unable to get symbols: item: %d, type: %d.", item, type); return errorInJson(UNKNOWN_ERROR, "Unable to get symbols!"); } return resultInJson(utf8Names); }