Exemplo n.º 1
0
 virtual int32_t
 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
     return s.length();
 }
Exemplo n.º 2
0
UBool TransliteratorSpec::hasFallback() const {
    return nextSpec.length() != 0;
}
Exemplo n.º 3
0
Transliterator* TransliteratorAlias::create(UParseError& pe,
                                            UErrorCode& ec) {
    if (U_FAILURE(ec)) {
        return 0;
    }
    Transliterator *t = NULL;
    switch (type) {
    case SIMPLE:
        t = Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec);
        if(U_FAILURE(ec)){
            return 0;
        }
        if (compoundFilter != 0)
            t->adoptFilter((UnicodeSet*)compoundFilter->clone());
        break;
    case COMPOUND:
        {
            // the total number of transliterators in the compound is the total number of anonymous transliterators
            // plus the total number of ID blocks-- we start by assuming the list begins and ends with an ID
            // block and that each pair anonymous transliterators has an ID block between them.  Then we go back
            // to see whether there really are ID blocks at the beginning and end (by looking for U+FFFF, which
            // marks the position where an anonymous transliterator goes) and adjust accordingly
            int32_t anonymousRBTs = transes->size();
            int32_t transCount = anonymousRBTs * 2 + 1;
            if (!aliasesOrRules.isEmpty() && aliasesOrRules[0] == (UChar)(0xffff))
                --transCount;
            if (aliasesOrRules.length() >= 2 && aliasesOrRules[aliasesOrRules.length() - 1] == (UChar)(0xffff))
                --transCount;
            UnicodeString noIDBlock((UChar)(0xffff));
            noIDBlock += ((UChar)(0xffff));
            int32_t pos = aliasesOrRules.indexOf(noIDBlock);
            while (pos >= 0) {
                --transCount;
                pos = aliasesOrRules.indexOf(noIDBlock, pos + 1);
            }

            UVector transliterators(ec);
            UnicodeString idBlock;
            int32_t blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff));
            while (blockSeparatorPos >= 0) {
                aliasesOrRules.extract(0, blockSeparatorPos, idBlock);
                aliasesOrRules.remove(0, blockSeparatorPos + 1);
                if (!idBlock.isEmpty())
                    transliterators.addElement(Transliterator::createInstance(idBlock, UTRANS_FORWARD, pe, ec), ec);
                if (!transes->isEmpty())
                    transliterators.addElement(transes->orphanElementAt(0), ec);
                blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff));
            }
            if (!aliasesOrRules.isEmpty())
                transliterators.addElement(Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec), ec);
            while (!transes->isEmpty())
                transliterators.addElement(transes->orphanElementAt(0), ec);

            if (U_SUCCESS(ec)) {
                t = new CompoundTransliterator(ID, transliterators,
                    (compoundFilter ? (UnicodeSet*)(compoundFilter->clone()) : 0),
                    anonymousRBTs, pe, ec);
                if (t == 0) {
                    ec = U_MEMORY_ALLOCATION_ERROR;
                    return 0;
                }
            } else {
                for (int32_t i = 0; i < transliterators.size(); i++)
                    delete (Transliterator*)(transliterators.elementAt(i));
            }
        }
        break;
    case RULES:
        U_ASSERT(FALSE); // don't call create() if isRuleBased() returns TRUE!
        break;
    }
    return t;
}
Exemplo n.º 4
0
/**
 * Parse the pattern from the given RuleCharacterIterator.  The
 * iterator is advanced over the parsed pattern.
 * @param chars iterator over the pattern characters.  Upon return
 * it will be advanced to the first character after the parsed
 * pattern, or the end of the iteration if all characters are
 * parsed.
 * @param symbols symbol table to use to parse and dereference
 * variables, or null if none.
 * @param rebuiltPat the pattern that was parsed, rebuilt or
 * copied from the input pattern, as appropriate.
 * @param options a bit mask of zero or more of the following:
 * IGNORE_SPACE, CASE.
 */
void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
                              const SymbolTable* symbols,
                              UnicodeString& rebuiltPat,
                              uint32_t options,
                              UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
                              UErrorCode& ec) {
    if (U_FAILURE(ec)) return;

    // Syntax characters: [ ] ^ - & { }

    // Recognized special forms for chars, sets: c-c s-s s&s

    int32_t opts = RuleCharacterIterator::PARSE_VARIABLES |
                   RuleCharacterIterator::PARSE_ESCAPES;
    if ((options & USET_IGNORE_SPACE) != 0) {
        opts |= RuleCharacterIterator::SKIP_WHITESPACE;
    }

    UnicodeString patLocal, buf;
    UBool usePat = FALSE;
    UnicodeSetPointer scratch;
    RuleCharacterIterator::Pos backup;

    // mode: 0=before [, 1=between [...], 2=after ]
    // lastItem: 0=none, 1=char, 2=set
    int8_t lastItem = 0, mode = 0;
    UChar32 lastChar = 0;
    UChar op = 0;

    UBool invert = FALSE;

    clear();

    while (mode != 2 && !chars.atEnd()) {
        U_ASSERT((lastItem == 0 && op == 0) ||
                 (lastItem == 1 && (op == 0 || op == HYPHEN /*'-'*/)) ||
                 (lastItem == 2 && (op == 0 || op == HYPHEN /*'-'*/ ||
                                    op == INTERSECTION /*'&'*/)));

        UChar32 c = 0;
        UBool literal = FALSE;
        UnicodeSet* nested = 0; // alias - do not delete

        // -------- Check for property pattern

        // setMode: 0=none, 1=unicodeset, 2=propertypat, 3=preparsed
        int8_t setMode = 0;
        if (resemblesPropertyPattern(chars, opts)) {
            setMode = 2;
        }

        // -------- Parse '[' of opening delimiter OR nested set.
        // If there is a nested set, use `setMode' to define how
        // the set should be parsed.  If the '[' is part of the
        // opening delimiter for this pattern, parse special
        // strings "[", "[^", "[-", and "[^-".  Check for stand-in
        // characters representing a nested set in the symbol
        // table.

        else {
            // Prepare to backup if necessary
            chars.getPos(backup);
            c = chars.next(opts, literal, ec);
            if (U_FAILURE(ec)) return;

            if (c == 0x5B /*'['*/ && !literal) {
                if (mode == 1) {
                    chars.setPos(backup); // backup
                    setMode = 1;
                } else {
                    // Handle opening '[' delimiter
                    mode = 1;
                    patLocal.append((UChar) 0x5B /*'['*/);
                    chars.getPos(backup); // prepare to backup
                    c = chars.next(opts, literal, ec);
                    if (U_FAILURE(ec)) return;
                    if (c == 0x5E /*'^'*/ && !literal) {
                        invert = TRUE;
                        patLocal.append((UChar) 0x5E /*'^'*/);
                        chars.getPos(backup); // prepare to backup
                        c = chars.next(opts, literal, ec);
                        if (U_FAILURE(ec)) return;
                    }
                    // Fall through to handle special leading '-';
                    // otherwise restart loop for nested [], \p{}, etc.
                    if (c == HYPHEN /*'-'*/) {
                        literal = TRUE;
                        // Fall through to handle literal '-' below
                    } else {
                        chars.setPos(backup); // backup
                        continue;
                    }
                }
            } else if (symbols != 0) {
                const UnicodeFunctor *m = symbols->lookupMatcher(c);
                if (m != 0) {
                    const UnicodeSet *ms = dynamic_cast<const UnicodeSet *>(m);
                    if (ms == NULL) {
                        ec = U_MALFORMED_SET;
                        return;
                    }
                    // casting away const, but `nested' won't be modified
                    // (important not to modify stored set)
                    nested = const_cast<UnicodeSet*>(ms);
                    setMode = 3;
                }
            }
        }

        // -------- Handle a nested set.  This either is inline in
        // the pattern or represented by a stand-in that has
        // previously been parsed and was looked up in the symbol
        // table.

        if (setMode != 0) {
            if (lastItem == 1) {
                if (op != 0) {
                    // syntaxError(chars, "Char expected after operator");
                    ec = U_MALFORMED_SET;
                    return;
                }
                add(lastChar, lastChar);
                _appendToPat(patLocal, lastChar, FALSE);
                lastItem = 0;
                op = 0;
            }

            if (op == HYPHEN /*'-'*/ || op == INTERSECTION /*'&'*/) {
                patLocal.append(op);
            }

            if (nested == 0) {
                // lazy allocation
                if (!scratch.allocate()) {
                    ec = U_MEMORY_ALLOCATION_ERROR;
                    return;
                }
                nested = scratch.pointer();
            }
            switch (setMode) {
            case 1:
                nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec);
                break;
            case 2:
                chars.skipIgnored(opts);
                nested->applyPropertyPattern(chars, patLocal, ec);
                if (U_FAILURE(ec)) return;
                break;
            case 3: // `nested' already parsed
                nested->_toPattern(patLocal, FALSE);
                break;
            }

            usePat = TRUE;

            if (mode == 0) {
                // Entire pattern is a category; leave parse loop
                *this = *nested;
                mode = 2;
                break;
            }

            switch (op) {
            case HYPHEN: /*'-'*/
                removeAll(*nested);
                break;
            case INTERSECTION: /*'&'*/
                retainAll(*nested);
                break;
            case 0:
                addAll(*nested);
                break;
            }

            op = 0;
            lastItem = 2;

            continue;
        }

        if (mode == 0) {
            // syntaxError(chars, "Missing '['");
            ec = U_MALFORMED_SET;
            return;
        }

        // -------- Parse special (syntax) characters.  If the
        // current character is not special, or if it is escaped,
        // then fall through and handle it below.

        if (!literal) {
            switch (c) {
            case 0x5D /*']'*/:
                if (lastItem == 1) {
                    add(lastChar, lastChar);
                    _appendToPat(patLocal, lastChar, FALSE);
                }
                // Treat final trailing '-' as a literal
                if (op == HYPHEN /*'-'*/) {
                    add(op, op);
                    patLocal.append(op);
                } else if (op == INTERSECTION /*'&'*/) {
                    // syntaxError(chars, "Trailing '&'");
                    ec = U_MALFORMED_SET;
                    return;
                }
                patLocal.append((UChar) 0x5D /*']'*/);
                mode = 2;
                continue;
            case HYPHEN /*'-'*/:
                if (op == 0) {
                    if (lastItem != 0) {
                        op = (UChar) c;
                        continue;
                    } else {
                        // Treat final trailing '-' as a literal
                        add(c, c);
                        c = chars.next(opts, literal, ec);
                        if (U_FAILURE(ec)) return;
                        if (c == 0x5D /*']'*/ && !literal) {
                            patLocal.append(HYPHEN_RIGHT_BRACE, 2);
                            mode = 2;
                            continue;
                        }
                    }
                }
                // syntaxError(chars, "'-' not after char or set");
                ec = U_MALFORMED_SET;
                return;
            case INTERSECTION /*'&'*/:
                if (lastItem == 2 && op == 0) {
                    op = (UChar) c;
                    continue;
                }
                // syntaxError(chars, "'&' not after set");
                ec = U_MALFORMED_SET;
                return;
            case 0x5E /*'^'*/:
                // syntaxError(chars, "'^' not after '['");
                ec = U_MALFORMED_SET;
                return;
            case 0x7B /*'{'*/:
                if (op != 0) {
                    // syntaxError(chars, "Missing operand after operator");
                    ec = U_MALFORMED_SET;
                    return;
                }
                if (lastItem == 1) {
                    add(lastChar, lastChar);
                    _appendToPat(patLocal, lastChar, FALSE);
                }
                lastItem = 0;
                buf.truncate(0);
                {
                    UBool ok = FALSE;
                    while (!chars.atEnd()) {
                        c = chars.next(opts, literal, ec);
                        if (U_FAILURE(ec)) return;
                        if (c == 0x7D /*'}'*/ && !literal) {
                            ok = TRUE;
                            break;
                        }
                        buf.append(c);
                    }
                    if (buf.length() < 1 || !ok) {
                        // syntaxError(chars, "Invalid multicharacter string");
                        ec = U_MALFORMED_SET;
                        return;
                    }
                }
                // We have new string. Add it to set and continue;
                // we don't need to drop through to the further
                // processing
                add(buf);
                patLocal.append((UChar) 0x7B /*'{'*/);
                _appendToPat(patLocal, buf, FALSE);
                patLocal.append((UChar) 0x7D /*'}'*/);
                continue;
            case SymbolTable::SYMBOL_REF:
                //         symbols  nosymbols
                // [a-$]   error    error (ambiguous)
                // [a$]    anchor   anchor
                // [a-$x]  var "x"* literal '$'
                // [a-$.]  error    literal '$'
                // *We won't get here in the case of var "x"
            {
                chars.getPos(backup);
                c = chars.next(opts, literal, ec);
                if (U_FAILURE(ec)) return;
                UBool anchor = (c == 0x5D /*']'*/ && !literal);
                if (symbols == 0 && !anchor) {
                    c = SymbolTable::SYMBOL_REF;
                    chars.setPos(backup);
                    break; // literal '$'
                }
                if (anchor && op == 0) {
                    if (lastItem == 1) {
                        add(lastChar, lastChar);
                        _appendToPat(patLocal, lastChar, FALSE);
                    }
                    add(U_ETHER);
                    usePat = TRUE;
                    patLocal.append((UChar) SymbolTable::SYMBOL_REF);
                    patLocal.append((UChar) 0x5D /*']'*/);
                    mode = 2;
                    continue;
                }
                // syntaxError(chars, "Unquoted '$'");
                ec = U_MALFORMED_SET;
                return;
            }
            default:
                break;
            }
        }

        // -------- Parse literal characters.  This includes both
        // escaped chars ("\u4E01") and non-syntax characters
        // ("a").

        switch (lastItem) {
        case 0:
            lastItem = 1;
            lastChar = c;
            break;
        case 1:
            if (op == HYPHEN /*'-'*/) {
                if (lastChar >= c) {
                    // Don't allow redundant (a-a) or empty (b-a) ranges;
                    // these are most likely typos.
                    // syntaxError(chars, "Invalid range");
                    ec = U_MALFORMED_SET;
                    return;
                }
                add(lastChar, c);
                _appendToPat(patLocal, lastChar, FALSE);
                patLocal.append(op);
                _appendToPat(patLocal, c, FALSE);
                lastItem = 0;
                op = 0;
            } else {
                add(lastChar, lastChar);
                _appendToPat(patLocal, lastChar, FALSE);
                lastChar = c;
            }
            break;
        case 2:
            if (op != 0) {
                // syntaxError(chars, "Set expected after operator");
                ec = U_MALFORMED_SET;
                return;
            }
            lastChar = c;
            lastItem = 1;
            break;
        }
    }

    if (mode != 2) {
        // syntaxError(chars, "Missing ']'");
        ec = U_MALFORMED_SET;
        return;
    }

    chars.skipIgnored(opts);

    /**
     * Handle global flags (invert, case insensitivity).  If this
     * pattern should be compiled case-insensitive, then we need
     * to close over case BEFORE COMPLEMENTING.  This makes
     * patterns like /[^abc]/i work.
     */
    if ((options & USET_CASE_INSENSITIVE) != 0) {
        (this->*caseClosure)(USET_CASE_INSENSITIVE);
    }
    else if ((options & USET_ADD_CASE_MAPPINGS) != 0) {
        (this->*caseClosure)(USET_ADD_CASE_MAPPINGS);
    }
    if (invert) {
        complement();
    }

    // Use the rebuilt pattern (patLocal) only if necessary.  Prefer the
    // generated pattern.
    if (usePat) {
        rebuiltPat.append(patLocal);
    } else {
        _generatePattern(rebuiltPat, FALSE);
    }
    if (isBogus() && U_SUCCESS(ec)) {
        // We likely ran out of memory. AHHH!
        ec = U_MEMORY_ALLOCATION_ERROR;
    }
}
Exemplo n.º 5
0
/**
 * If in "by digits" mode, fills in the substitution one decimal digit
 * at a time using the rule set containing this substitution.
 * Otherwise, uses the superclass function.
 * @param number The number being formatted
 * @param toInsertInto The string to insert the result of formatting
 * the substitution into
 * @param pos The position of the owning rule's rule text in
 * toInsertInto
 */
void
FractionalPartSubstitution::doSubstitution(double number, UnicodeString& toInsertInto, int32_t _pos) const
{
  // if we're not in "byDigits" mode, just use the inherited
  // doSubstitution() routine
  if (!byDigits) {
    NFSubstitution::doSubstitution(number, toInsertInto, _pos);

    // if we're in "byDigits" mode, transform the value into an integer
    // by moving the decimal point eight places to the right and
    // pulling digits off the right one at a time, formatting each digit
    // as an integer using this substitution's owning rule set
    // (this is slower, but more accurate, than doing it from the
    // other end)
  } else {
    //          int32_t numberToFormat = (int32_t)uprv_round(transformNumber(number) * uprv_pow(10, kMaxDecimalDigits));
    //          // this flag keeps us from formatting trailing zeros.  It starts
    //          // out false because we're pulling from the right, and switches
    //          // to true the first time we encounter a non-zero digit
    //          UBool doZeros = FALSE;
    //          for (int32_t i = 0; i < kMaxDecimalDigits; i++) {
    //              int64_t digit = numberToFormat % 10;
    //              if (digit != 0 || doZeros) {
    //                  if (doZeros && useSpaces) {
    //                      toInsertInto.insert(_pos + getPos(), gSpace);
    //                  }
    //                  doZeros = TRUE;
    //                  getRuleSet()->format(digit, toInsertInto, _pos + getPos());
    //              }
    //              numberToFormat /= 10;
    //          }

    DigitList dl;
    dl.set(number, 20, TRUE);
    
    UBool pad = FALSE;
    while (dl.fCount > (dl.fDecimalAt <= 0 ? 0 : dl.fDecimalAt)) {
      if (pad && useSpaces) {
        toInsertInto.insert(_pos + getPos(), gSpace);
      } else {
        pad = TRUE;
      }
      getRuleSet()->format((int64_t)(dl.fDigits[--dl.fCount] - '0'), toInsertInto, _pos + getPos());
    }
    while (dl.fDecimalAt < 0) {
      if (pad && useSpaces) {
        toInsertInto.insert(_pos + getPos(), gSpace);
      } else {
        pad = TRUE;
      }
      getRuleSet()->format((int64_t)0, toInsertInto, _pos + getPos());
      ++dl.fDecimalAt;
    }

    if (!pad) {
      // hack around lack of precision in digitlist. if we would end up with
      // "foo point" make sure we add a " zero" to the end.
      getRuleSet()->format((int64_t)0, toInsertInto, _pos + getPos());
    }
  }
}
Exemplo n.º 6
0
static inline UBool
isPerlOpen(const UnicodeString &pattern, int32_t pos) {
    UChar c;
    return pattern.charAt(pos)==BACKSLASH && ((c=pattern.charAt(pos+1))==LOWER_P || c==UPPER_P);
}
Exemplo n.º 7
0
static inline UBool
isPOSIXOpen(const UnicodeString &pattern, int32_t pos) {
    return pattern.charAt(pos)==SET_OPEN && pattern.charAt(pos+1)==COLON;
}
Exemplo n.º 8
0
void printUnicodeString(const UnicodeString &s) {
    char charBuf[1000];
    s.extract(0, s.length(), charBuf, sizeof(charBuf)-1, 0);   
    charBuf[sizeof(charBuf)-1] = 0;          
    printf("%s", charBuf);
}
Exemplo n.º 9
0
/**
 * Append c to buf, unless buf is empty or buf already ends in c.
 */
static void _smartAppend(UnicodeString& buf, UChar c) {
    if (buf.length() != 0 &&
        buf.charAt(buf.length() - 1) != c) {
        buf.append(c);
    }
}
Exemplo n.º 10
0
BOOST_FIXTURE_TEST_CASE(test11, base_fixture_t)
{
    // Тесты на ::FmtLoadStr FMTLOAD ::Format ::LoadStr ::LoadStrPart ::CutToChar ::TrimLeft ::TrimRight
    {
        UnicodeString str = ::FmtLoadStr(CONST_TEST_STRING, L"lalala", 42);
        // BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str()));
        // BOOST_TEST_MESSAGE("length = " << str.size());
        BOOST_CHECK(W2MB(str.c_str()) == "test string: \"lalala\" 42");
    }
    {
        UnicodeString str2 = FMTLOAD(CONST_TEST_STRING, L"lalala", 42);
        // BOOST_TEST_MESSAGE("str2 = " << W2MB(str2.c_str()));
        BOOST_CHECK(W2MB(str2.c_str()) == "test string: \"lalala\" 42");
    }
    {
        UnicodeString str2 = ::Format(L"test: %s %d", L"lalala", 42);
        BOOST_TEST_MESSAGE("str2 = " << W2MB(str2.c_str()));
        BOOST_CHECK_EQUAL(0, wcscmp(str2.c_str(), L"test: lalala 42"));
    }
    {
        UnicodeString str3 = FORMAT(L"test: %s %d", L"lalala", 42);
        BOOST_TEST_MESSAGE("str3 = " << W2MB(str3.c_str()));
        BOOST_CHECK_EQUAL(0, wcscmp(str3.c_str(), L"test: lalala 42"));
    }
    {
        UnicodeString str = ::TrimLeft(L"");
        BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str()));
        BOOST_CHECK_EQUAL(0, wcscmp(str.c_str(), L""));
    }
    {
        UnicodeString str = ::TrimLeft(L"1");
        BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str()));
        BOOST_CHECK_EQUAL(0, wcscmp(str.c_str(), L"1"));
    }
    {
        UnicodeString str = ::TrimLeft(L" 1");
        BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str()));
        BOOST_CHECK_EQUAL(0, wcscmp(str.c_str(), L"1"));
    }
    {
        UnicodeString str = ::TrimRight(L"");
        BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str()));
        BOOST_CHECK_EQUAL(0, wcscmp(str.c_str(), L""));
    }
    {
        UnicodeString str = ::TrimRight(L"1");
        BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str()));
        BOOST_CHECK_EQUAL(0, wcscmp(str.c_str(), L"1"));
    }
    {
        UnicodeString str = ::TrimRight(L"1 ");
        BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str()));
        BOOST_CHECK_EQUAL(0, wcscmp(str.c_str(), L"1"));
    }
    {
        // UnicodeString CutToChar(UnicodeString &Str, char Ch, bool Trim)
        UnicodeString Str1 = L" part 1 | part 2 ";
        UnicodeString str1 = ::CutToChar(Str1, '|', false);
        BOOST_TEST_MESSAGE("str1 = '" << W2MB(str1.c_str()) << "'");
        BOOST_TEST_MESSAGE("Str1 = '" << W2MB(Str1.c_str()) << "'");
        // BOOST_TEST_MESSAGE("Str1 = '" << W2MB(Str1.c_str()) << "'");
        // DEBUG_PRINTF(L"str1 = \"%s\"", str1.c_str());
        BOOST_CHECK_EQUAL(0, wcscmp(str1.c_str(), L" part 1 "));

        UnicodeString str2 = ::CutToChar(Str1, '|', true);
        BOOST_TEST_MESSAGE("str2 = '" << W2MB(str2.c_str()) << "'");
        BOOST_TEST_MESSAGE("Str1 = '" << W2MB(Str1.c_str()) << "'");
        BOOST_CHECK_EQUAL(0, wcscmp(str2.c_str(), L" part 2"));
    }
    {
        UnicodeString str = ::LoadStr(CONST_TEST_STRING);
        BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str()));
        BOOST_CHECK_EQUAL(0, wcscmp(str.c_str(), L"test string: \"%s\" %d"));
    }
    {
        UnicodeString str = ::LoadStrPart(CONST_TEST_STRING2, 1);
        BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str()));
        BOOST_CHECK_EQUAL(0, wcscmp(str.c_str(), L"test string part 1"));
    }
    {
        UnicodeString str = ::LoadStrPart(CONST_TEST_STRING2, 2);
        BOOST_TEST_MESSAGE("str = " << W2MB(str.c_str()));
        BOOST_CHECK_EQUAL(0, wcscmp(str.c_str(), L"part 2"));
    }
}
Exemplo n.º 11
0
UHashtable*
ZoneMeta::createCanonicalMap(void) {
    UErrorCode status = U_ZERO_ERROR;

    UHashtable *canonicalMap = NULL;
    UResourceBundle *zoneFormatting = NULL;
    UResourceBundle *tzitem = NULL;
    UResourceBundle *aliases = NULL;

    StringEnumeration* tzenum = NULL;
    int32_t numZones;

    canonicalMap = uhash_open(uhash_hashUChars, uhash_compareUChars, NULL, &status);
    if (U_FAILURE(status)) {
        return NULL;
    }
    // no key deleter
    uhash_setValueDeleter(canonicalMap, deleteCanonicalMapEntry);

    zoneFormatting = ures_openDirect(NULL, gSupplementalData, &status);
    zoneFormatting = ures_getByKey(zoneFormatting, gZoneFormattingTag, zoneFormatting, &status);
    if (U_FAILURE(status)) {
        goto error_cleanup;
    }

    while (ures_hasNext(zoneFormatting)) {
        tzitem = ures_getNextResource(zoneFormatting, tzitem, &status);
        if (U_FAILURE(status)) {
            status = U_ZERO_ERROR;
            continue;
        }
        if (ures_getType(tzitem) != URES_TABLE) {
            continue;
        }

        int32_t canonicalLen;
        const UChar *canonical = ures_getStringByKey(tzitem, gCanonicalTag, &canonicalLen, &status);
        if (U_FAILURE(status)) {
            status = U_ZERO_ERROR;
            continue;
        }

        int32_t territoryLen;
        const UChar *territory = ures_getStringByKey(tzitem, gTerritoryTag, &territoryLen, &status);
        if (U_FAILURE(status)) {
            territory = NULL;
            status = U_ZERO_ERROR;
        }

        // Create canonical map entry
        CanonicalMapEntry *entry = (CanonicalMapEntry*)uprv_malloc(sizeof(CanonicalMapEntry));
        if (entry == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            goto error_cleanup;
        }
        entry->id = canonical;
        if (territory == NULL || u_strcmp(territory, gWorld) == 0) {
            entry->country = NULL;
        } else {
            entry->country = territory;
        }

        // Put this entry in the hashtable. Since this hashtable has no key deleter,
        // key is treated as const, but must be passed as non-const.
        uhash_put(canonicalMap, (UChar*)canonical, entry, &status);
        if (U_FAILURE(status)) {
            goto error_cleanup;
        }

        // Get aliases
        aliases = ures_getByKey(tzitem, gAliasesTag, aliases, &status);
        if (U_FAILURE(status)) {
            // No aliases
            status = U_ZERO_ERROR;
            continue;
        }

        while (ures_hasNext(aliases)) {
            const UChar* alias = ures_getNextString(aliases, NULL, NULL, &status);
            if (U_FAILURE(status)) {
                status = U_ZERO_ERROR;
                continue;
            }
            // Create canonical map entry for this alias
            entry = (CanonicalMapEntry*)uprv_malloc(sizeof(CanonicalMapEntry));
            if (entry == NULL) {
                status = U_MEMORY_ALLOCATION_ERROR;
                goto error_cleanup;
            }
            entry->id = canonical;
            if (territory  == NULL || u_strcmp(territory, gWorld) == 0) {
                entry->country = NULL;
            } else {
                entry->country = territory;
            }

            // Put this entry in the hashtable. Since this hashtable has no key deleter,
            // key is treated as const, but must be passed as non-const.
            uhash_put(canonicalMap, (UChar*)alias, entry, &status);
            if (U_FAILURE(status)) {
                goto error_cleanup;
            }
        }
    }

    // Some available Olson zones are not included in CLDR data (such as Asia/Riyadh87).
    // Also, when we update Olson tzdata, new zones may be added.
    // This code scans all available zones in zoneinfo.res, and if any of them are
    // missing, add them to the map.
    tzenum = TimeZone::createEnumeration();
    numZones = tzenum->count(status);
    if (U_SUCCESS(status)) {
        int32_t i;
        for (i = 0; i < numZones; i++) {
            const UnicodeString *zone = tzenum->snext(status);
            if (U_FAILURE(status)) {
                // We should not get here.
                status = U_ZERO_ERROR;
                continue;
            }
            UChar zoneUChars[ZID_KEY_MAX];
            int32_t zoneUCharsLen = zone->extract(zoneUChars, ZID_KEY_MAX, status) + 1; // Add one for NUL termination
            if (U_FAILURE(status) || status==U_STRING_NOT_TERMINATED_WARNING) {
                status = U_ZERO_ERROR;
                continue; // zone id is too long to extract
            }
            CanonicalMapEntry *entry = (CanonicalMapEntry*)uhash_get(canonicalMap, zoneUChars);
            if (entry) {
                // Already included in CLDR data
                continue;
            }
            // Not in CLDR data, but it could be new one whose alias is available
            // in CLDR.
            int32_t nTzdataEquivalent = TimeZone::countEquivalentIDs(*zone);
            int32_t j;
            for (j = 0; j < nTzdataEquivalent; j++) {
                UnicodeString alias = TimeZone::getEquivalentID(*zone, j);
                if (alias == *zone) {
                    continue;
                }
                UChar aliasUChars[ZID_KEY_MAX];
                alias.extract(aliasUChars, ZID_KEY_MAX, status);
                if (U_FAILURE(status) || status==U_STRING_NOT_TERMINATED_WARNING) {
                    status = U_ZERO_ERROR;
                    continue; // zone id is too long to extract
                }
                entry = (CanonicalMapEntry*)uhash_get(canonicalMap, aliasUChars);
                if (entry != NULL) {
                    break;
                }
            }
            // Create a new map entry
            CanonicalMapEntry* newEntry = (CanonicalMapEntry*)uprv_malloc(sizeof(CanonicalMapEntry));
            int32_t idLen;
            if (newEntry == NULL) {
                status = U_MEMORY_ALLOCATION_ERROR;
                goto error_cleanup;
            }
            if (entry == NULL) {
                // Set dereferenced zone ID as the canonical ID
                UnicodeString derefZone;
                TimeZone::dereferOlsonLink(*zone, derefZone);
                if (derefZone.length() == 0) {
                    // It should never happen.. but just in case
                    derefZone = *zone;
                }
                idLen = derefZone.length() + 1;
                newEntry->id = allocUStringInTable(idLen);
                if (newEntry->id == NULL) {
                    status = U_MEMORY_ALLOCATION_ERROR;
                    uprv_free(newEntry);
                    goto error_cleanup;
                }
                // Copy NULL terminated string
                derefZone.extract((UChar*)(newEntry->id), idLen, status);
                if (U_FAILURE(status)) {
                    removeLastUStringFromTable();
                    uprv_free(newEntry);
                    goto error_cleanup;
                }
                // No territory information available
                newEntry->country = NULL;
            } else {
                // Duplicate the entry
                newEntry->id = entry->id;
                newEntry->country = entry->country;
            }

            // Put this entry in the hashtable
            UChar *key = allocUStringInTable(zoneUCharsLen);
            if (key == NULL) {
                status = U_MEMORY_ALLOCATION_ERROR;
                deleteCanonicalMapEntry(newEntry);
                goto error_cleanup;
            }
            u_strncpy(key, zoneUChars, zoneUCharsLen);
            uhash_put(canonicalMap, key, newEntry, &status);
            if (U_FAILURE(status)) {
                goto error_cleanup;
            }
        }
    }

normal_cleanup:
    ures_close(aliases);
    ures_close(tzitem);
    ures_close(zoneFormatting);
    delete tzenum;
    return canonicalMap;

error_cleanup:
    if (canonicalMap != NULL) {
        uhash_close(canonicalMap);
        canonicalMap = NULL;
    }
    goto normal_cleanup;
}
Exemplo n.º 12
0
void DataDrivenFormatTest::testConvertDate(TestData *testData,
        const DataMap * /* settings */, UBool fmt) {
    UnicodeString kPATTERN("PATTERN="); // TODO: static
    UnicodeString kMILLIS("MILLIS="); // TODO: static
    UnicodeString kRELATIVE_MILLIS("RELATIVE_MILLIS="); // TODO: static
    UnicodeString kRELATIVE_ADD("RELATIVE_ADD:"); // TODO: static

    UErrorCode status = U_ZERO_ERROR;
    SimpleDateFormat basicFmt(UnicodeString("EEE MMM dd yyyy / YYYY'-W'ww-ee"),
                              status);
    if (U_FAILURE(status)) {
        dataerrln("FAIL: Couldn't create basic SimpleDateFormat: %s",
                  u_errorName(status));
        return;
    }

    const DataMap *currentCase= NULL;
    // Start the processing
    int n = 0;
    while (testData->nextCase(currentCase, status)) {
        char calLoc[256] = "";
        DateTimeStyleSet styleSet;
        UnicodeString pattern;
        UBool usePattern = FALSE;
        (void)usePattern;   // Suppress unused warning.
        CalendarFieldsSet fromSet;
        UDate fromDate = 0;
        UBool useDate = FALSE;

        UDate now = Calendar::getNow();

        ++n;

        char theCase[200];
        sprintf(theCase, "case %d:", n);
        UnicodeString caseString(theCase, "");

        // load params
        UnicodeString locale = currentCase->getString("locale", status);
        if (U_FAILURE(status)) {
            errln("case %d: No 'locale' line.", n);
            continue;
        }
        UnicodeString zone = currentCase->getString("zone", status);
        if (U_FAILURE(status)) {
            errln("case %d: No 'zone' line.", n);
            continue;
        }
        UnicodeString spec = currentCase->getString("spec", status);
        if(U_FAILURE(status)) {
            errln("case %d: No 'spec' line.", n);
            continue;
        }
        UnicodeString date = currentCase->getString("date", status);
        if(U_FAILURE(status)) {
            errln("case %d: No 'date' line.", n);
            continue;
        }
        UnicodeString expectStr= currentCase->getString("str", status);
        if(U_FAILURE(status)) {
            errln("case %d: No 'str' line.", n);
            continue;
        }

        DateFormat *format = NULL;

        // Process: 'locale'
        locale.extract(0, locale.length(), calLoc, (const char*)0); // default codepage.  Invariant codepage doesn't have '@'!
        Locale loc(calLoc);
        if(spec.startsWith(kPATTERN)) {
            pattern = UnicodeString(spec,kPATTERN.length());
            usePattern = TRUE;
            format = new SimpleDateFormat(pattern, loc, status);
            if(U_FAILURE(status)) {
                errln("case %d: could not create SimpleDateFormat from pattern: %s", n, u_errorName(status));
                continue;
            }
        } else {
            if(styleSet.parseFrom(spec, status)<0 || U_FAILURE(status)) {
                errln("case %d: could not parse spec as style fields: %s", n, u_errorName(status));
                continue;
            }
            format = DateFormat::createDateTimeInstance((DateFormat::EStyle)styleSet.getDateStyle(), (DateFormat::EStyle)styleSet.getTimeStyle(), loc);
            if(format == NULL ) {
                errln("case %d: could not create SimpleDateFormat from styles.", n);
                continue;
            }
        }

        Calendar *cal = Calendar::createInstance(loc, status);
        if(U_FAILURE(status)) {
            errln("case %d: could not create calendar from %s", n, calLoc);
        }

        if (zone.length() > 0) {
            TimeZone * tz = TimeZone::createTimeZone(zone);
            cal->setTimeZone(*tz);
            format->setTimeZone(*tz);
            delete tz;
        }

        // parse 'date'
        if(date.startsWith(kMILLIS)) {
            UnicodeString millis = UnicodeString(date, kMILLIS.length());
            useDate = TRUE;
            fromDate = udbg_stod(millis);
        } else if(date.startsWith(kRELATIVE_MILLIS)) {
            UnicodeString millis = UnicodeString(date, kRELATIVE_MILLIS.length());
            useDate = TRUE;
            fromDate = udbg_stod(millis) + now;
        } else if(date.startsWith(kRELATIVE_ADD)) {
            UnicodeString add = UnicodeString(date, kRELATIVE_ADD.length());  // "add" is a string indicating which fields to add
            if(fromSet.parseFrom(add, status)<0 || U_FAILURE(status)) {
                errln("case %d: could not parse date as RELATIVE_ADD calendar fields: %s", n, u_errorName(status));
                continue;
            }
            useDate=TRUE;
            cal->clear();
            cal->setTime(now, status);
            for (int q=0; q<UCAL_FIELD_COUNT; q++) {
                if (fromSet.isSet((UCalendarDateFields)q)) {
                    //int32_t oldv = cal->get((UCalendarDateFields)q, status);
                    if (q == UCAL_DATE) {
                        cal->add((UCalendarDateFields)q,
                                 fromSet.get((UCalendarDateFields)q), status);
                    } else {
                        cal->set((UCalendarDateFields)q,
                                 fromSet.get((UCalendarDateFields)q));
                    }
                    //int32_t newv = cal->get((UCalendarDateFields)q, status);
                }
            }
            fromDate = cal->getTime(status);
            if(U_FAILURE(status)) {
                errln("case %d: could not apply date as RELATIVE_ADD calendar fields: %s", n, u_errorName(status));
                continue;
            }
        } else if(fromSet.parseFrom(date, status)<0 || U_FAILURE(status)) {
            errln("case %d: could not parse date as calendar fields: %s", n, u_errorName(status));
            continue;
        }

        // now, do it.
        if (fmt) {
            FieldPosition pos;
//            logln((UnicodeString)"#"+n+" "+locale+"/"+from+" >>> "+toCalLoc+"/"
//                    +to);
            cal->clear();
            UnicodeString output;
            output.remove();

            if(useDate) {
//                cal->setTime(fromDate, status);
//                if(U_FAILURE(status)) {
//                    errln("case %d: could not set time on calendar: %s", n, u_errorName(status));
//                    continue;
//                }
                format->format(fromDate, output, pos, status);
            } else {
                fromSet.setOnCalendar(cal, status);
                if(U_FAILURE(status)) {
                    errln("case %d: could not set fields on calendar: %s", n, u_errorName(status));
                    continue;
                }
                format->format(*cal, output, pos);
            }

            // check erro result from 'format'
            if(U_FAILURE(status)) {
                errln("case %d: could not format(): %s", n, u_errorName(status)); // TODO: use 'pos'
            }
//            if(pos.getBeginIndex()==0 && pos.getEndIndex()==0) { // TODO: more precise error?
//                errln("WARNING: case %d: format's pos returned (0,0) - error ??", n);
//            }

            if(output == expectStr) {
                logln(caseString+": format: SUCCESS! "+UnicodeString("expect=output=")+output);
            } else {
                UnicodeString result;
                UnicodeString result2;
                errln(caseString+": format:  output!=expectStr, got " + *udbg_escape(output, &result) + " expected " + *udbg_escape(expectStr, &result2));
            }
        } else {
            cal->clear();
            ParsePosition pos;
            format->parse(expectStr,*cal,pos);
            if(useDate) {
                UDate gotDate = cal->getTime(status);
                if(U_FAILURE(status)) {
                    errln(caseString+": parse: could not get time on calendar: "+UnicodeString(u_errorName(status)));
                    continue;
                }
                if(gotDate == fromDate) {
                    logln(caseString+": parse: SUCCESS! "+UnicodeString("gotDate=parseDate=")+expectStr);
                } else {
                    UnicodeString expectDateStr, gotDateStr;
                    basicFmt.format(fromDate,expectDateStr);
                    basicFmt.format(gotDate,gotDateStr);
                    errln(caseString+": parse: FAIL. parsed '"+expectStr+"' and got "+gotDateStr+", expected " + expectDateStr);
                }
            } else {
//                Calendar *cal2 = cal->clone();
//                cal2->clear();
//                fromSet.setOnCalendar(cal2, status);
                if(U_FAILURE(status)) {
                    errln("case %d: parse: could not set fields on calendar: %s", n, u_errorName(status));
                    continue;
                }

                CalendarFieldsSet diffSet;
//                diffSet.clear();
                if (!fromSet.matches(cal, diffSet, status)) {
                    UnicodeString diffs = diffSet.diffFrom(fromSet, status);
                    errln((UnicodeString)"FAIL: "+caseString
                          +", Differences: '"+ diffs
                          +"', status: "+ u_errorName(status));
                } else if (U_FAILURE(status)) {
                    errln("FAIL: "+caseString+" parse SET SOURCE calendar Failed to match: "
                          +u_errorName(status));
                } else {
                    logln("PASS: "******" parse.");
                }



            }
        }
        delete cal;
        delete format;

    }
//    delete basicFmt;
}
Exemplo n.º 13
0
void QuantityFormatterTest::TestBasic() {
    UErrorCode status = U_ZERO_ERROR;
#if !UCONFIG_NO_FORMATTING
    QuantityFormatter fmt;
    assertFalse(
            "adding bad variant",
            fmt.addIfAbsent("a bad variant", "{0} pounds", status));
    assertEquals("adding bad variant status", (int32_t)U_ILLEGAL_ARGUMENT_ERROR, status);
    status = U_ZERO_ERROR;
    assertFalse(
            "Adding bad pattern",
            fmt.addIfAbsent("other", "{0} {1} too many placeholders", status));
    assertEquals("adding bad pattern status", (int32_t)U_ILLEGAL_ARGUMENT_ERROR, status);
    status = U_ZERO_ERROR;
    assertFalse("isValid with no patterns", fmt.isValid());
    assertTrue(
            "Adding good pattern with no placeholders",
            fmt.addIfAbsent("zero", "no placeholder", status));
    assertTrue(
            "Adding good pattern",
            fmt.addIfAbsent("other", "{0} pounds", status));
    assertTrue("isValid with other", fmt.isValid());
    assertTrue(
            "Adding good pattern",
            fmt.addIfAbsent("one", "{0} pound", status));

    assertEquals(
            "getByVariant",
            fmt.getByVariant("bad variant")->getTextWithNoArguments(),
            " pounds");
    assertEquals(
            "getByVariant",
            fmt.getByVariant("other")->getTextWithNoArguments(),
            " pounds");
    assertEquals(
            "getByVariant",
            fmt.getByVariant("one")->getTextWithNoArguments(),
            " pound");
    assertEquals(
            "getByVariant",
            fmt.getByVariant("few")->getTextWithNoArguments(),
            " pounds");

    // Test copy constructor
    {
        QuantityFormatter copied(fmt);
        assertEquals(
                "copied getByVariant",
                copied.getByVariant("other")->getTextWithNoArguments(),
                " pounds");
        assertEquals(
                "copied getByVariant",
                copied.getByVariant("one")->getTextWithNoArguments(),
                " pound");
        assertEquals(
                "copied getByVariant",
                copied.getByVariant("few")->getTextWithNoArguments(),
                " pounds");
    }
        
    // Test assignment
    {
        QuantityFormatter assigned;
        assigned = fmt;
        assertEquals(
                "assigned getByVariant",
                assigned.getByVariant("other")->getTextWithNoArguments(),
                " pounds");
        assertEquals(
                "assigned getByVariant",
                assigned.getByVariant("one")->getTextWithNoArguments(),
                " pound");
        assertEquals(
                "assigned getByVariant",
                assigned.getByVariant("few")->getTextWithNoArguments(),
                " pounds");
    }

    // Test format.
    {
        LocalPointer<NumberFormat> numfmt(
                NumberFormat::createInstance(Locale::getEnglish(), status));
        LocalPointer<PluralRules> plurrule(
                PluralRules::forLocale("en", status));
        FieldPosition pos(FieldPosition::DONT_CARE);
        UnicodeString appendTo;
        assertEquals(
                "format singular",
                UnicodeString("1 pound"),
                fmt.format(
                        1.0,
                        *numfmt,
                        *plurrule,
                        appendTo,
                        pos,
                        status), TRUE);
        appendTo.remove();
        assertEquals(
                "format plural",
                UnicodeString("2 pounds"),
                fmt.format(
                        2.0,
                        *numfmt,
                        *plurrule,
                        appendTo,
                        pos,
                        status), TRUE);
    }
    fmt.reset();
    assertFalse("isValid after reset", fmt.isValid());
#endif
    assertSuccess("", status);
}
int main(int argc, const char *argv[]) {
    UErrorCode errorCode = U_ZERO_ERROR;

    // Get the unsafeBackwardsSet
    const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode);
    if(U_FAILURE(errorCode)) {
      fprintf(stderr, "Err: %s getting root cache entry\n", u_errorName(errorCode));
      return 1;
    }
    const UVersionInfo &version = rootEntry->tailoring->version;
    const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet;
    char verString[20];
    u_versionToString(version, verString);
    fprintf(stderr, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION, verString);
    int32_t rangeCount = unsafeBackwardSet->getRangeCount();
    
#if SERIALIZE
    fprintf(stderr, ".. serializing\n");
    // UnicodeSet serialization
    
    UErrorCode preflightCode = U_ZERO_ERROR;
    // preflight
    int32_t serializedCount = unsafeBackwardSet->serialize(NULL,0,preflightCode);
    if(U_FAILURE(preflightCode) && preflightCode != U_BUFFER_OVERFLOW_ERROR) {
      fprintf(stderr, "Err: %s preflighting unicode set\n", u_errorName(preflightCode));
      return 1;
    }
    uint16_t *serializedData = new uint16_t[serializedCount];
    // serialize
    unsafeBackwardSet->serialize(serializedData, serializedCount, errorCode);
    if(U_FAILURE(errorCode)) {
      delete [] serializedData;
      fprintf(stderr, "Err: %s serializing unicodeset\n", u_errorName(errorCode));
      return 1;
    }
#endif
    
#if PATTERN
    fprintf(stderr,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n");
    // attempt to use pattern
    
    UnicodeString pattern;
    UnicodeSet set(*unsafeBackwardSet);
    set.compact();
    set.toPattern(pattern, FALSE);

    if(U_SUCCESS(errorCode)) {
      // This fails (bug# ?) - which is why this method was abandoned.
      
      // UnicodeSet usA(pattern, errorCode);
      // fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode));
      // return 1;
    }


    const UChar *buf = pattern.getBuffer();
    int32_t needed = pattern.length();

    // print
    {
      char buf2[2048];
      int32_t len2 = pattern.extract(0, pattern.length(), buf2, "utf-8");
      buf2[len2]=0;
      fprintf(stderr,"===\n%s\n===\n", buf2);
    }

    const UnicodeString unsafeBackwardPattern(FALSE, buf, needed);
  if(U_SUCCESS(errorCode)) {
    //UnicodeSet us(unsafeBackwardPattern, errorCode);
    //    fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode));
  } else {
    fprintf(stderr, "Uset OK - \n");
  }
#endif


  // Generate the output file.

  printf("// collunsafe.h\n");
  printf("// %s\n", U_COPYRIGHT_STRING);
  printf("\n");
  printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n");
  printf("// Machine generated, do not edit.\n");
  printf("\n");
  printf("#ifndef COLLUNSAFE_H\n"
         "#define COLLUNSAFE_H\n"
         "\n"
         "#include \"unicode/utypes.h\"\n"
         "\n"
         "#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION "\"\n");
  printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString);

  
  
#if PATTERN
  printf("#define COLLUNSAFE_PATTERN 1\n");
  printf("static const int32_t collunsafe_len = %d;\n", needed);
  printf("static const UChar collunsafe_pattern[collunsafe_len] = {\n");
  for(int i=0;i<needed;i++) {
    if( (i>0) && (i%8 == 0) ) {
      printf(" // %d\n", i);
    }
    printf("0x%04X", buf[i]); // TODO check
    if(i != (needed-1)) {
      printf(", ");
    }
    }
  printf(" //%d\n};\n", (needed-1));
#endif

#if RANGE
    fprintf(stderr, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n");
    printf("#define COLLUNSAFE_RANGE 1\n");
    printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount);
    printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount*2);
    for(int32_t i=0;i<rangeCount;i++) {
      printf(" 0x%04X, 0x%04X, // %d\n",
             unsafeBackwardSet->getRangeStart(i),
             unsafeBackwardSet->getRangeEnd(i),
             i);
    }
    printf("};\n");
#endif

#if SERIALIZE
    printf("#define COLLUNSAFE_SERIALIZE 1\n");    
    printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount);
    printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount);
    for(int32_t i=0;i<serializedCount;i++) {
      if( (i>0) && (i%8 == 0) ) {
        printf(" // %d\n", i);
      }
      printf("0x%04X", serializedData[i]); // TODO check
      if(i != (serializedCount-1)) {
        printf(", ");
      }
    }  
    printf("};\n");
#endif
    
    printf("#endif\n");
    fflush(stderr);
    fflush(stdout);
    return(U_SUCCESS(errorCode)?0:1);
}
std::string GlobalizationNDK::numberToString(const std::string& args)
{
    if (args.empty()) {
        slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::numberToString: no arguments provided!");
        return errorInJson(UNKNOWN_ERROR, "No arguments provided!");
    }

    Json::Reader reader;
    Json::Value root;
    bool parse = reader.parse(args, root);

    if (!parse) {
        slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::numberToString: invalid json data: %s",
                args.c_str());
        return errorInJson(PARSING_ERROR, "Invalid json data!");
    }

    Json::Value nv = root["number"];
    if (nv.isNull()) {
        slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::numberToString: no number provided!");
        return errorInJson(FORMATTING_ERROR, "No number provided!");
    }

    if (!nv.isNumeric()) {
        slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::numberToString: invalid number type: %d!",
                nv.type());
        return errorInJson(FORMATTING_ERROR, "Invalid number type!");
    }

    // This is the default value when no options provided.
    ENumberType type = kNumberDecimal;

    Json::Value options = root["options"];
    std::string error;
    if (!handleNumberOptions(options, type, error))
        return errorInJson(PARSING_ERROR, error);

    UErrorCode status = U_ZERO_ERROR;
    NumberFormat* nf;
    switch (type) {
    case kNumberDecimal:
    default:
        nf = NumberFormat::createInstance(status);
        break;
    case kNumberCurrency:
        nf = NumberFormat::createCurrencyInstance(status);
        break;
    case kNumberPercent:
        nf = NumberFormat::createPercentInstance(status);
        break;
    }

    if (!nf) {
        slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::numberToString: failed to create NumberFormat instance for type %d: %d",
                status, type);
        return errorInJson(UNKNOWN_ERROR, "Failed to create NumberFormat instance!");
    }
    std::auto_ptr<NumberFormat> deleter(nf);

    UnicodeString result;
    nf->format(nv.asDouble(), result);
    std::string utf8;
    result.toUTF8String(utf8);

    return resultInJson(utf8);
}
Exemplo n.º 16
0
//------------------------------------------------------------------------------
//
//   findSetFor    given a UnicodeString,
//                  - find the corresponding Unicode Set  (uset node)
//                         (create one if necessary)
//                  - Set fLeftChild of the caller's node (should be a setRef node)
//                         to the uset node
//                 Maintain a hash table of uset nodes, so the same one is always used
//                    for the same string.
//                 If a "to adopt" set is provided and we haven't seen this key before,
//                    add the provided set to the hash table.
//                 If the string is one (32 bit) char in length, the set contains
//                    just one element which is the char in question.
//                 If the string is "any", return a set containing all chars.
//
//------------------------------------------------------------------------------
void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt) {

    RBBISetTableEl   *el;

    // First check whether we've already cached a set for this string.
    // If so, just use the cached set in the new node.
    //   delete any set provided by the caller, since we own it.
    el = (RBBISetTableEl *)uhash_get(fSetTable, &s);
    if (el != NULL) {
        delete setToAdopt;
        node->fLeftChild = el->val;
        U_ASSERT(node->fLeftChild->fType == RBBINode::uset);
        return;
    }

    // Haven't seen this set before.
    // If the caller didn't provide us with a prebuilt set,
    //   create a new UnicodeSet now.
    if (setToAdopt == NULL) {
        if (s.compare(kAny, -1) == 0) {
            setToAdopt = new UnicodeSet(0x000000, 0x10ffff);
        } else {
            UChar32 c;
            c = s.char32At(0);
            setToAdopt = new UnicodeSet(c, c);
        }
    }

    //
    // Make a new uset node to refer to this UnicodeSet
    // This new uset node becomes the child of the caller's setReference node.
    //
    RBBINode *usetNode    = new RBBINode(RBBINode::uset);
    if (usetNode == NULL) {
        error(U_MEMORY_ALLOCATION_ERROR);
        return;
    }
    usetNode->fInputSet   = setToAdopt;
    usetNode->fParent     = node;
    node->fLeftChild      = usetNode;
    usetNode->fText = s;


    //
    // Add the new uset node to the list of all uset nodes.
    //
    fRB->fUSetNodes->addElement(usetNode, *fRB->fStatus);


    //
    // Add the new set to the set hash table.
    //
    el      = (RBBISetTableEl *)uprv_malloc(sizeof(RBBISetTableEl));
    UnicodeString *tkey = new UnicodeString(s);
    if (tkey == NULL || el == NULL || setToAdopt == NULL) {
        // Delete to avoid memory leak
        delete tkey;
        tkey = NULL;
        uprv_free(el);
        el = NULL;
        delete setToAdopt;
        setToAdopt = NULL;

        error(U_MEMORY_ALLOCATION_ERROR);
        return;
    }
    el->key = tkey;
    el->val = usetNode;
    uhash_put(fSetTable, el->key, el, fRB->fStatus);

    return;
}
std::string GlobalizationNDK::getNumberPattern(const std::string& args)
{
    // This is the default value when no options provided.
    ENumberType type = kNumberDecimal;

    if (!args.empty()) {
        Json::Reader reader;
        Json::Value root;
        bool parse = reader.parse(args, root);

        if (!parse) {
            slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getNumberPattern: invalid json data: %s",
                    args.c_str());
            return errorInJson(PARSING_ERROR, "Invalid json data!");
        }

        Json::Value options = root["options"];
        std::string error;
        if (!handleNumberOptions(options, type, error))
            return errorInJson(PARSING_ERROR, error);
    }

    std::string pattern, symbol, positive, negative, decimal, grouping;
    int fraction;
    double rounding;

    UErrorCode status = U_ZERO_ERROR;
    NumberFormat* nf;
    switch (type) {
    case kNumberDecimal:
    default:
        nf = NumberFormat::createInstance(status);
        break;
    case kNumberCurrency:
        nf = NumberFormat::createCurrencyInstance(status);
        break;
    case kNumberPercent:
        nf = NumberFormat::createPercentInstance(status);
        break;
    }

    if (!nf) {
        slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getNumberPattern: failed to create NumberFormat instance for type %d: %d",
                status, type);
        return errorInJson(UNKNOWN_ERROR, "Failed to create NumberFormat instance!");
    }
    std::auto_ptr<NumberFormat> deleter(nf);

    if (nf->getDynamicClassID() != DecimalFormat::getStaticClassID()) {
        slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getNumberPattern: DecimalFormat expected: %p != %p",
                nf->getDynamicClassID(), DecimalFormat::getStaticClassID());
        return errorInJson(UNKNOWN_ERROR, "DecimalFormat expected!");
    }

    DecimalFormat* df = (DecimalFormat*) nf;
    const DecimalFormatSymbols* dfs = df->getDecimalFormatSymbols();
    if (!dfs) {
        slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getNumberPattern: unable to get DecimalFormatSymbols!");
        return errorInJson(UNKNOWN_ERROR, "Failed to get DecimalFormatSymbols instance!");
    }

    UnicodeString ucs;

    df->toPattern(ucs);
    ucs.toUTF8String(pattern);
    ucs.remove();

    df->getPositivePrefix(ucs);
    if (ucs.isEmpty())
        df->getPositiveSuffix(ucs);
    ucs.toUTF8String(positive);
    ucs.remove();

    df->getNegativePrefix(ucs);
    if (ucs.isEmpty())
        df->getNegativeSuffix(ucs);
    ucs.toUTF8String(negative);
    ucs.remove();

    rounding = df->getRoundingIncrement();
    fraction = df->getMaximumFractionDigits();

    ucs = dfs->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
    ucs.toUTF8String(decimal);
    ucs.remove();

    ucs = dfs->getSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
    ucs.toUTF8String(grouping);
    ucs.remove();

    if (type == kNumberPercent)
        ucs = dfs->getSymbol(DecimalFormatSymbols::kPercentSymbol);
    else if (type == kNumberCurrency)
        ucs = dfs->getSymbol(DecimalFormatSymbols::kCurrencySymbol);
    else
        ucs = dfs->getSymbol(DecimalFormatSymbols::kDigitSymbol);

    ucs.toUTF8String(symbol);
    ucs.remove();

    return resultInJson(pattern, symbol, fraction, rounding, positive, negative, decimal, grouping);
}
Exemplo n.º 18
0
/* {{{ timezone_convert_to_datetimezone
 *	   Convert from TimeZone to DateTimeZone object */
U_CFUNC zval *timezone_convert_to_datetimezone(const TimeZone *timeZone,
											   intl_error *outside_error,
											   const char *func, zval *ret)
{
	UnicodeString		id;
	char				*message = NULL;
	php_timezone_obj	*tzobj;
	zval				arg;

	timeZone->getID(id);
	if (id.isBogus()) {
		spprintf(&message, 0, "%s: could not obtain TimeZone id", func);
		intl_errors_set(outside_error, U_ILLEGAL_ARGUMENT_ERROR,
			message, 1);
		goto error;
	}

	object_init_ex(ret, php_date_get_timezone_ce());
	tzobj = Z_PHPTIMEZONE_P(ret);

	if (id.compare(0, 3, UnicodeString("GMT", sizeof("GMT")-1, US_INV)) == 0) {
		/* The DateTimeZone constructor doesn't support offset time zones,
		 * so we must mess with DateTimeZone structure ourselves */
		tzobj->initialized	  = 1;
		tzobj->type			  = TIMELIB_ZONETYPE_OFFSET;
		//convert offset from milliseconds to seconds
		tzobj->tzi.utc_offset = timeZone->getRawOffset() / 1000;
	} else {
		zend_string *u8str;
		/* Call the constructor! */
		u8str = intl_charFromString(id, &INTL_ERROR_CODE(*outside_error));
		if (!u8str) {
			spprintf(&message, 0, "%s: could not convert id to UTF-8", func);
			intl_errors_set(outside_error, INTL_ERROR_CODE(*outside_error),
				message, 1);
			goto error;
		}
		ZVAL_STR(&arg, u8str);
		zend_call_method_with_1_params(Z_OBJ_P(ret), NULL, &Z_OBJCE_P(ret)->constructor, "__construct", NULL, &arg);
		if (EG(exception)) {
			spprintf(&message, 0,
				"%s: DateTimeZone constructor threw exception", func);
			intl_errors_set(outside_error, U_ILLEGAL_ARGUMENT_ERROR,
				message, 1);
			zend_object_store_ctor_failed(Z_OBJ_P(ret));
			zval_ptr_dtor(&arg);
			goto error;
		}
		zval_ptr_dtor(&arg);
	}

	if (0) {
error:
		if (ret) {
			zval_ptr_dtor(ret);
		}
		ret = NULL;
	}

	if (message) {
		efree(message);
	}
	return ret;
}
Exemplo n.º 19
0
static inline UBool
isNameOpen(const UnicodeString &pattern, int32_t pos) {
    return pattern.charAt(pos)==BACKSLASH && pattern.charAt(pos+1)==UPPER_N;
}
Exemplo n.º 20
0
void
CalendarLimitTest::doLimitsTest(Calendar& cal,
                                const int32_t* fieldsToTest,
                                UDate startDate,
                                int32_t testDuration) {
    static const int32_t FIELDS[] = {
        UCAL_ERA,
        UCAL_YEAR,
        UCAL_MONTH,
        UCAL_WEEK_OF_YEAR,
        UCAL_WEEK_OF_MONTH,
        UCAL_DAY_OF_MONTH,
        UCAL_DAY_OF_YEAR,
        UCAL_DAY_OF_WEEK_IN_MONTH,
        UCAL_YEAR_WOY,
        UCAL_EXTENDED_YEAR,
        -1,
    };

    static const char* FIELD_NAME[] = {
        "ERA", "YEAR", "MONTH", "WEEK_OF_YEAR", "WEEK_OF_MONTH",
        "DAY_OF_MONTH", "DAY_OF_YEAR", "DAY_OF_WEEK",
        "DAY_OF_WEEK_IN_MONTH", "AM_PM", "HOUR", "HOUR_OF_DAY",
        "MINUTE", "SECOND", "MILLISECOND", "ZONE_OFFSET",
        "DST_OFFSET", "YEAR_WOY", "DOW_LOCAL", "EXTENDED_YEAR",
        "JULIAN_DAY", "MILLISECONDS_IN_DAY",
        "IS_LEAP_MONTH"
    };

    UErrorCode status = U_ZERO_ERROR;
    int32_t i, j;
    UnicodeString ymd;

    GregorianCalendar greg(status);
    if (failure(status, "new GregorianCalendar")) {
        return;
    }
    greg.setTime(startDate, status);
    if (failure(status, "GregorianCalendar::setTime")) {
        return;
    }
    logln((UnicodeString)"Start: " + startDate);

    if (fieldsToTest == NULL) {
        fieldsToTest = FIELDS;
    }


    // Keep a record of minima and maxima that we actually see.
    // These are kept in an array of arrays of hashes.
    int32_t limits[UCAL_FIELD_COUNT][4];
    for (j = 0; j < UCAL_FIELD_COUNT; j++) {
        limits[j][0] = INT32_MAX;
        limits[j][1] = INT32_MIN;
        limits[j][2] = INT32_MAX;
        limits[j][3] = INT32_MIN;
    }

    // This test can run for a long time; show progress.
    UDate millis = ucal_getNow();
    UDate mark = millis + 5000; // 5 sec
    millis -= testDuration * 1000; // stop time if testDuration<0

    for (i = 0;
         testDuration > 0 ? i < testDuration
                        : ucal_getNow() < millis;
         ++i) {
        if (ucal_getNow() >= mark) {
            logln((UnicodeString)"(" + i + " days)");
            mark += 5000; // 5 sec
        }
        cal.setTime(greg.getTime(status), status);
        cal.setMinimalDaysInFirstWeek(1);
        if (failure(status, "Calendar set/getTime")) {
            return;
        }
        for (j = 0; fieldsToTest[j] >= 0; ++j) {
            UCalendarDateFields f = (UCalendarDateFields)fieldsToTest[j];
            int32_t v = cal.get(f, status);
            int32_t minActual = cal.getActualMinimum(f, status);
            int32_t maxActual = cal.getActualMaximum(f, status);
            int32_t minLow = cal.getMinimum(f);
            int32_t minHigh = cal.getGreatestMinimum(f);
            int32_t maxLow = cal.getLeastMaximum(f);
            int32_t maxHigh = cal.getMaximum(f);

            if (limits[j][0] > minActual) {
                // the minimum
                limits[j][0] = minActual;
            }
            if (limits[j][1] < minActual) {
                // the greatest minimum
                limits[j][1] = minActual;
            }
            if (limits[j][2] > maxActual) {
                // the least maximum
                limits[j][2] = maxActual;
            }
            if (limits[j][3] < maxActual) {
                // the maximum
                limits[j][3] = maxActual;
            }

            if (minActual < minLow || minActual > minHigh) {
                errln((UnicodeString)"Fail: [" + cal.getType() + "] " +
                      ymdToString(cal, ymd) +
                      " Range for min of " + FIELD_NAME[f] + "(" + f +
                      ")=" + minLow + ".." + minHigh +
                      ", actual_min=" + minActual);
            }
            if (maxActual < maxLow || maxActual > maxHigh) {
                errln((UnicodeString)"Fail: [" + cal.getType() + "] " +
                      ymdToString(cal, ymd) +
                      " Range for max of " + FIELD_NAME[f] + "(" + f +
                      ")=" + maxLow + ".." + maxHigh +
                      ", actual_max=" + maxActual);
            }
            if (v < minActual || v > maxActual) {
                errln((UnicodeString)"Fail: [" + cal.getType() + "] " +
                      ymdToString(cal, ymd) +
                      " " + FIELD_NAME[f] + "(" + f + ")=" + v +
                      ", actual range=" + minActual + ".." + maxActual +
                      ", allowed=(" + minLow + ".." + minHigh + ")..(" +
                      maxLow + ".." + maxHigh + ")");
            }
        }
        greg.add(UCAL_DAY_OF_YEAR, 1, status);
        if (failure(status, "Calendar::add")) {
            return;
        }
    }

    // Check actual maxima and minima seen against ranges returned
    // by API.
    UnicodeString buf;
    for (j = 0; fieldsToTest[j] >= 0; ++j) {
        int32_t rangeLow, rangeHigh;
        UBool fullRangeSeen = TRUE;
        UCalendarDateFields f = (UCalendarDateFields)fieldsToTest[j];

        buf.remove();
        buf.append((UnicodeString)"[" + cal.getType() + "] " + FIELD_NAME[f]);

        // Minumum
        rangeLow = cal.getMinimum(f);
        rangeHigh = cal.getGreatestMinimum(f);
        if (limits[j][0] != rangeLow || limits[j][1] != rangeHigh) {
            fullRangeSeen = FALSE;
        }
        buf.append((UnicodeString)" minima range=" + rangeLow + ".." + rangeHigh);
        buf.append((UnicodeString)" minima actual=" + limits[j][0] + ".." + limits[j][1]);

        // Maximum
        rangeLow = cal.getLeastMaximum(f);
        rangeHigh = cal.getMaximum(f);
        if (limits[j][2] != rangeLow || limits[j][3] != rangeHigh) {
            fullRangeSeen = FALSE;
        }
        buf.append((UnicodeString)" maxima range=" + rangeLow + ".." + rangeHigh);
        buf.append((UnicodeString)" maxima actual=" + limits[j][2] + ".." + limits[j][3]);

        if (fullRangeSeen) {
            logln((UnicodeString)"OK: " + buf);
        } else {
            // This may or may not be an error -- if the range of dates
            // we scan over doesn't happen to contain a minimum or
            // maximum, it doesn't mean some other range won't.
            logln((UnicodeString)"Warning: " + buf);
        }
    }

    logln((UnicodeString)"End: " + greg.getTime(status));
}
Exemplo n.º 21
0
/**
 * Return true if the given position, in the given pattern, appears
 * to be the start of a UnicodeSet pattern.
 */
UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) {
    return ((pos+1) < pattern.length() &&
            pattern.charAt(pos) == (UChar)91/*[*/) ||
           resemblesPropertyPattern(pattern, pos);
}
Exemplo n.º 22
0
int main(int argc, char **argv) {

    Calendar *cal;
    TimeZone *zone;
    DateFormat *fmt;
    UErrorCode status = U_ZERO_ERROR;
    UnicodeString str;
    UDate date;

    // The languages in which we will display the date
    static char* LANGUAGE[] = {
        "en", "de", "fr"
    };
    static const int32_t N_LANGUAGE = sizeof(LANGUAGE)/sizeof(LANGUAGE[0]);

    // The time zones in which we will display the time
    static char* TIMEZONE[] = {
        "America/Los_Angeles",
        "America/New_York",
        "Europe/Paris",
        "Europe/Berlin"
    };
    static const int32_t N_TIMEZONE = sizeof(TIMEZONE)/sizeof(TIMEZONE[0]);

    // Create a calendar
    cal = Calendar::createInstance(status);
    check(status, "Calendar::createInstance");
    zone = createZone("GMT"); // Create a GMT zone
    cal->adoptTimeZone(zone);
    cal->clear();
    cal->set(1999, Calendar::JUNE, 4);
    date = cal->getTime(status);
    check(status, "Calendar::getTime");

    for (int32_t i=0; i<N_LANGUAGE; ++i) {
        Locale loc(LANGUAGE[i]);

        // Create a formatter for DATE and TIME
        fmt = DateFormat::createDateTimeInstance(
                                DateFormat::kFull, DateFormat::kFull, loc);

        for (int32_t j=0; j<N_TIMEZONE; ++j) {

            cal->adoptTimeZone(createZone(TIMEZONE[j]));
            fmt->setCalendar(*cal);

            // Format the date
            str.remove();
            fmt->format(date, str, status);
            
            // Display the formatted date string
            printf("Date (%s, %s): ", LANGUAGE[i], TIMEZONE[j]);
            uprintf(escape(str));
            printf("\n\n");
        }

        delete fmt;
    }

    printf("Exiting successfully\n");
    return 0;
}
Exemplo n.º 23
0
void
CurrencyPluralInfo::setupCurrencyPluralPattern(const Locale& loc, UErrorCode& status) {
    if (U_FAILURE(status)) {
        return;
    }

    if (fPluralCountToCurrencyUnitPattern) {
        deleteHash(fPluralCountToCurrencyUnitPattern);
    }
    fPluralCountToCurrencyUnitPattern = initHash(status);
    if (U_FAILURE(status)) {
        return;
    }

    NumberingSystem *ns = NumberingSystem::createInstance(loc,status);
    UErrorCode ec = U_ZERO_ERROR;
    UResourceBundle *rb = ures_open(NULL, loc.getName(), &ec);
    UResourceBundle *numElements = ures_getByKeyWithFallback(rb, gNumberElementsTag, NULL, &ec);
    rb = ures_getByKeyWithFallback(numElements, ns->getName(), rb, &ec);
    rb = ures_getByKeyWithFallback(rb, gPatternsTag, rb, &ec);
    int32_t ptnLen;
    const UChar* numberStylePattern = ures_getStringByKeyWithFallback(rb, gDecimalFormatTag, &ptnLen, &ec);
    // Fall back to "latn" if num sys specific pattern isn't there.
    if ( ec == U_MISSING_RESOURCE_ERROR && uprv_strcmp(ns->getName(),gLatnTag)) {
        ec = U_ZERO_ERROR;
        rb = ures_getByKeyWithFallback(numElements, gLatnTag, rb, &ec);
        rb = ures_getByKeyWithFallback(rb, gPatternsTag, rb, &ec);
        numberStylePattern = ures_getStringByKeyWithFallback(rb, gDecimalFormatTag, &ptnLen, &ec);
    }
    int32_t numberStylePatternLen = ptnLen;
    const UChar* negNumberStylePattern = NULL;
    int32_t negNumberStylePatternLen = 0;
    // TODO: Java
    // parse to check whether there is ";" separator in the numberStylePattern
    UBool hasSeparator = false;
    if (U_SUCCESS(ec)) {
        for (int32_t styleCharIndex = 0; styleCharIndex < ptnLen; ++styleCharIndex) {
            if (numberStylePattern[styleCharIndex] == gNumberPatternSeparator) {
                hasSeparator = true;
                // split the number style pattern into positive and negative
                negNumberStylePattern = numberStylePattern + styleCharIndex + 1;
                negNumberStylePatternLen = ptnLen - styleCharIndex - 1;
                numberStylePatternLen = styleCharIndex;
            }
        }
    }

    ures_close(numElements);
    ures_close(rb);
    delete ns;

    if (U_FAILURE(ec)) {
        return;
    }

    UResourceBundle *currRb = ures_open(U_ICUDATA_CURR, loc.getName(), &ec);
    UResourceBundle *currencyRes = ures_getByKeyWithFallback(currRb, gCurrUnitPtnTag, NULL, &ec);
    
#ifdef CURRENCY_PLURAL_INFO_DEBUG
    std::cout << "in set up\n";
#endif
    StringEnumeration* keywords = fPluralRules->getKeywords(ec);
    if (U_SUCCESS(ec)) {
        const char* pluralCount;
        while ((pluralCount = keywords->next(NULL, ec)) != NULL) {
            if ( U_SUCCESS(ec) ) {
                int32_t ptnLen;
                UErrorCode err = U_ZERO_ERROR;
                const UChar* patternChars = ures_getStringByKeyWithFallback(
                    currencyRes, pluralCount, &ptnLen, &err);
                if (U_SUCCESS(err) && ptnLen > 0) {
                    UnicodeString* pattern = new UnicodeString(patternChars, ptnLen);
#ifdef CURRENCY_PLURAL_INFO_DEBUG
                    char result_1[1000];
                    pattern->extract(0, pattern->length(), result_1, "UTF-8");
                    std::cout << "pluralCount: " << pluralCount << "; pattern: " << result_1 << "\n";
#endif
                    pattern->findAndReplace(UnicodeString(TRUE, gPart0, 3), 
                      UnicodeString(numberStylePattern, numberStylePatternLen));
                    pattern->findAndReplace(UnicodeString(TRUE, gPart1, 3), UnicodeString(TRUE, gTripleCurrencySign, 3));

                    if (hasSeparator) {
                        UnicodeString negPattern(patternChars, ptnLen);
                        negPattern.findAndReplace(UnicodeString(TRUE, gPart0, 3), 
                          UnicodeString(negNumberStylePattern, negNumberStylePatternLen));
                        negPattern.findAndReplace(UnicodeString(TRUE, gPart1, 3), UnicodeString(TRUE, gTripleCurrencySign, 3));
                        pattern->append(gNumberPatternSeparator);
                        pattern->append(negPattern);
                    }
#ifdef CURRENCY_PLURAL_INFO_DEBUG
                    pattern->extract(0, pattern->length(), result_1, "UTF-8");
                    std::cout << "pluralCount: " << pluralCount << "; pattern: " << result_1 << "\n";
#endif

                    fPluralCountToCurrencyUnitPattern->put(UnicodeString(pluralCount, -1, US_INV), pattern, status);
                }
            }
        }
    }
    delete keywords;
    ures_close(currencyRes);
    ures_close(currRb);
}
Exemplo n.º 24
0
static void appendRange(
        const UnicodeString &src,
        int32_t end,
        UnicodeString &dest) {
    dest.append(src, end, src.length() - end);
}
Exemplo n.º 25
0
NFSubstitution::NFSubstitution(int32_t _pos,
                               const NFRuleSet* _ruleSet,
                               const RuleBasedNumberFormat* formatter,
                               const UnicodeString& description,
                               UErrorCode& status)
                               : pos(_pos), ruleSet(NULL), numberFormat(NULL)
{
    // the description should begin and end with the same character.
    // If it doesn't that's a syntax error.  Otherwise,
    // makeSubstitution() was the only thing that needed to know
    // about these characters, so strip them off
    UnicodeString workingDescription(description);
    if (description.length() >= 2
        && description.charAt(0) == description.charAt(description.length() - 1))
    {
        workingDescription.remove(description.length() - 1, 1);
        workingDescription.remove(0, 1);
    }
    else if (description.length() != 0) {
        // throw new IllegalArgumentException("Illegal substitution syntax");
        status = U_PARSE_ERROR;
        return;
    }

    // if the description was just two paired token characters
    // (i.e., "<<" or ">>"), it uses the rule set it belongs to to
    // format its result
    if (workingDescription.length() == 0) {
        this->ruleSet = _ruleSet;
    }
    // if the description contains a rule set name, that's the rule
    // set we use to format the result: get a reference to the
    // names rule set
    else if (workingDescription.charAt(0) == gPercent) {
        this->ruleSet = formatter->findRuleSet(workingDescription, status);
    }
    // if the description begins with 0 or #, treat it as a
    // DecimalFormat pattern, and initialize a DecimalFormat with
    // that pattern (then set it to use the DecimalFormatSymbols
    // belonging to our formatter)
    else if (workingDescription.charAt(0) == gPound || workingDescription.charAt(0) ==gZero) {
        DecimalFormatSymbols* sym = formatter->getDecimalFormatSymbols();
        if (!sym) {
            status = U_MISSING_RESOURCE_ERROR;
            return;
        }
        this->numberFormat = new DecimalFormat(workingDescription, *sym, status);
        /* test for NULL */
        if (this->numberFormat == 0) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        if (U_FAILURE(status)) {
            delete (DecimalFormat*)this->numberFormat;
            this->numberFormat = NULL;
            return;
        }
        // this->numberFormat->setDecimalFormatSymbols(formatter->getDecimalFormatSymbols());
    }
    // if the description is ">>>", this substitution bypasses the
    // usual rule-search process and always uses the rule that precedes
    // it in its own rule set's rule list (this is used for place-value
    // notations: formats where you want to see a particular part of
    // a number even when it's 0)
    else if (workingDescription.charAt(0) == gGreaterThan) {
        // this causes problems when >>> is used in a frationalPartSubstitution
        // this->ruleSet = NULL;
        this->ruleSet = _ruleSet;
        this->numberFormat = NULL;
    }
    // and of the description is none of these things, it's a syntax error
    else {
        // throw new IllegalArgumentException("Illegal substitution syntax");
        status = U_PARSE_ERROR;
    }
}
std::string GlobalizationNDK::getCurrencyPattern(const std::string& args)
{
    if (args.empty()) {
        slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getCurrencyPattern: no arguments provided!");
        return errorInJson(UNKNOWN_ERROR, "No arguments provided!");
    }

    Json::Reader reader;
    Json::Value root;
    bool parse = reader.parse(args, root);

    if (!parse) {
        slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getCurrencyPattern: invalid json data: %s",
                args.c_str());
        return errorInJson(PARSING_ERROR, "Invalid json data!");
    }

    Json::Value ccv = root["currencyCode"];
    if (ccv.isNull()) {
        slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getCurrencyPattern: no currencyCode provided!");
        return errorInJson(FORMATTING_ERROR, "No currencyCode provided!");
    }

    if (!ccv.isString()) {
        slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getCurrencyPattern: invalid currencyCode type: %d!",
                ccv.type());
        return errorInJson(FORMATTING_ERROR, "Invalid currencyCode type!");
    }

    std::string cc = ccv.asString();
    if (cc.empty()) {
        slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getCurrencyPattern: empty currencyCode!");
        return errorInJson(FORMATTING_ERROR, "Empty currencyCode!");
    }

    UnicodeString ucc = UnicodeString::fromUTF8(cc);
    DecimalFormat* df = 0;
    int count = 0;
    const Locale* locs = Locale::getAvailableLocales(count);
    for (int i = 0; i < count; ++i) {
        UErrorCode status = U_ZERO_ERROR;
        NumberFormat* nf = NumberFormat::createCurrencyInstance(*(locs + i), status);
        if (!nf) {
            slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getCurrencyPattern: locale %d: unable to get NumberFormat instance!",
                    i);
            continue;
        }
        std::auto_ptr<NumberFormat> ndeleter(nf);

        const UChar* currency = nf->getCurrency();
        if (!currency) {
            slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getCurrencyPattern: locale %d: failed to getCurrency!",
                    i);
            continue;
        }

        if (!ucc.compare(currency, -1)) {
            df = (DecimalFormat*) ndeleter.release();
            break;
        }
    }

    if (!df)
        return errorInJson(UNKNOWN_ERROR, "Currency not supported!");

    std::auto_ptr<DecimalFormat> deleter(df);

    const DecimalFormatSymbols* dfs = df->getDecimalFormatSymbols();
    if (!dfs) {
        slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getCurrencyPattern: unable to get DecimalFormatSymbols!");
        return errorInJson(UNKNOWN_ERROR, "Failed to get DecimalFormatSymbols!");
    }

    UnicodeString ucs;

    std::string pattern;
    df->toPattern(ucs);
    ucs.toUTF8String(pattern);
    ucs.remove();

    int fraction = df->getMaximumFractionDigits();
    double rounding = df->getRoundingIncrement();

    std::string decimal;
    ucs = dfs->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
    ucs.toUTF8String(decimal);
    ucs.remove();

    std::string grouping;
    ucs = dfs->getSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
    ucs.toUTF8String(grouping);
    ucs.remove();

    return resultInJson(pattern, cc, fraction, rounding, decimal, grouping);
}
Exemplo n.º 27
0
Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID,
                                              TransliteratorParser& parser,
                                              TransliteratorAlias*& aliasReturn,
                                              UErrorCode& status) {
    U_ASSERT(aliasReturn == NULL);
    TransliteratorEntry *entry = find(ID);

    if (entry == 0) {
        // We get to this point if there are two threads, one of which
        // is instantiating an ID, and another of which is removing
        // the same ID from the registry, and the timing is just right.
        return 0;
    }

    // The usage model for the caller is that they will first call
    // reg->get() inside the mutex, they'll get back an alias, they call
    // alias->isRuleBased(), and if they get TRUE, they call alias->parse()
    // outside the mutex, then reg->reget() inside the mutex again.  A real
    // mess, but it gets things working for ICU 3.0. [alan].

    // Note: It's possible that in between the caller calling
    // alias->parse() and reg->reget(), that another thread will have
    // called reg->reget(), and the entry will already have been fixed up.
    // We have to detect this so we don't stomp over existing entry
    // data members and potentially leak memory (u.data and compoundFilter).

    if (entry->entryType == TransliteratorEntry::RULES_FORWARD ||
        entry->entryType == TransliteratorEntry::RULES_REVERSE ||
        entry->entryType == TransliteratorEntry::LOCALE_RULES) {
        
        if (parser.idBlockVector.isEmpty() && parser.dataVector.isEmpty()) {
            entry->u.data = 0;
            entry->entryType = TransliteratorEntry::ALIAS;
            entry->stringArg = UNICODE_STRING_SIMPLE("Any-NULL");
        }
        else if (parser.idBlockVector.isEmpty() && parser.dataVector.size() == 1) {
            entry->u.data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
            entry->entryType = TransliteratorEntry::RBT_DATA;
        }
        else if (parser.idBlockVector.size() == 1 && parser.dataVector.isEmpty()) {
            entry->stringArg = *(UnicodeString*)(parser.idBlockVector.elementAt(0));
            entry->compoundFilter = parser.orphanCompoundFilter();
            entry->entryType = TransliteratorEntry::ALIAS;
        }
        else {
            entry->entryType = TransliteratorEntry::COMPOUND_RBT;
            entry->compoundFilter = parser.orphanCompoundFilter();
            entry->u.dataVector = new UVector(status);
            entry->stringArg.remove();

            int32_t limit = parser.idBlockVector.size();
            if (parser.dataVector.size() > limit)
                limit = parser.dataVector.size();

            for (int32_t i = 0; i < limit; i++) {
                if (i < parser.idBlockVector.size()) {
                    UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i);
                    if (!idBlock->isEmpty())
                        entry->stringArg += *idBlock;
                }
                if (!parser.dataVector.isEmpty()) {
                    TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
                    entry->u.dataVector->addElement(data, status);
                    entry->stringArg += (UChar)0xffff;  // use U+FFFF to mark position of RBTs in ID block
                }
            }
        }
    }

    Transliterator *t =
        instantiateEntry(ID, entry, aliasReturn, status);
    return t;
}
Exemplo n.º 28
0
TransliteratorEntry* TransliteratorRegistry::findInBundle(const TransliteratorSpec& specToOpen,
                                            const TransliteratorSpec& specToFind,
                                            const UnicodeString& variant,
                                            UTransDirection direction)
{
    UnicodeString utag;
    UnicodeString resStr;
    int32_t pass;

    for (pass=0; pass<2; ++pass) {
        utag.truncate(0);
        // First try either TransliteratorTo_xxx or
        // TransliterateFrom_xxx, then try the bidirectional
        // Transliterate_xxx.  This precedence order is arbitrary
        // but must be consistent and documented.
        if (pass == 0) {
            utag.append(direction == UTRANS_FORWARD ?
                        TRANSLITERATE_TO : TRANSLITERATE_FROM);
        } else {
            utag.append(TRANSLITERATE);
        }
        UnicodeString s(specToFind.get());
        utag.append(s.toUpper(""));
        CharString tag(utag);
        
        UErrorCode status = U_ZERO_ERROR;
        ResourceBundle subres(specToOpen.getBundle().get(tag, status));
        if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) {
            continue;
        }
        
        s.truncate(0);
        if (specToOpen.get() != LocaleUtility::initNameFromLocale(subres.getLocale(), s)) {
            continue;
        }
        
        if (variant.length() != 0) {
            CharString var(variant);
            status = U_ZERO_ERROR;
            resStr = subres.getStringEx(var, status);
            if (U_SUCCESS(status)) {
                // Exit loop successfully
                break;
            }
        }
        
        else {
            // Variant is empty, which means match the first variant listed.
            status = U_ZERO_ERROR;
            resStr = subres.getStringEx(1, status);
            if (U_SUCCESS(status)) {
                // Exit loop successfully
                break;
            }
        }
    }

    if (pass==2) {
        // Failed
        return NULL;
    }

    // We have succeeded in loading a string from the locale
    // resources.  Create a new registry entry to hold it and return it.
    TransliteratorEntry *entry = new TransliteratorEntry();
    if (entry != 0) {
        // The direction is always forward for the
        // TransliterateTo_xxx and TransliterateFrom_xxx
        // items; those are unidirectional forward rules.
        // For the bidirectional Transliterate_xxx items,
        // the direction is the value passed in to this
        // function.
        int32_t dir = (pass == 0) ? UTRANS_FORWARD : direction;
        entry->entryType = TransliteratorEntry::LOCALE_RULES;
        entry->stringArg = resStr;
        entry->intArg = dir;
    }

    return entry;
}
Exemplo n.º 29
0
/**
 * Implements {@link Transliterator#handleTransliterate}.
 */
void TitlecaseTransliterator::handleTransliterate(
                                  Replaceable& text, UTransPosition& offsets,
                                  UBool isIncremental) const
{
    // TODO reimplement, see ustrcase.c
    // using a real word break iterator
    //   instead of just looking for a transition between cased and uncased characters
    // call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap)
    // needs to take isIncremental into account because case mappings are context-sensitive
    //   also detect when lowercasing function did not finish because of context

    if (offsets.start >= offsets.limit) {
        return;
    }

    // case type: >0 cased (UCASE_LOWER etc.)  ==0 uncased  <0 case-ignorable
    int32_t type;

    // Our mode; we are either converting letter toTitle or
    // toLower.
    UBool doTitle = TRUE;

    // Determine if there is a preceding context of cased case-ignorable*,
    // in which case we want to start in toLower mode.  If the
    // prior context is anything else (including empty) then start
    // in toTitle mode.
    UChar32 c;
    int32_t start;
    for (start = offsets.start - 1; start >= offsets.contextStart; start -= U16_LENGTH(c)) {
        c = text.char32At(start);
        type=ucase_getTypeOrIgnorable(c);
        if(type>0) { // cased
            doTitle=FALSE;
            break;
        } else if(type==0) { // uncased but not ignorable
            break;
        }
        // else (type<0) case-ignorable: continue
    }

    // Convert things after a cased character toLower; things
    // after an uncased, non-case-ignorable character toTitle.  Case-ignorable
    // characters are copied directly and do not change the mode.
    UCaseContext csc;
    uprv_memset(&csc, 0, sizeof(csc));
    csc.p = &text;
    csc.start = offsets.contextStart;
    csc.limit = offsets.contextLimit;

    UnicodeString tmp;
    const UChar *s;
    int32_t textPos, delta, result;

    for(textPos=offsets.start; textPos<offsets.limit;) {
        csc.cpStart=textPos;
        c=text.char32At(textPos);
        csc.cpLimit=textPos+=U16_LENGTH(c);

        type=ucase_getTypeOrIgnorable(c);
        if(type>=0) { // not case-ignorable
            if(doTitle) {
                result=ucase_toFullTitle(c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT);
            } else {
                result=ucase_toFullLower(c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT);
            }
            doTitle = (UBool)(type==0); // doTitle=isUncased

            if(csc.b1 && isIncremental) {
                // fMap() tried to look beyond the context limit
                // wait for more input
                offsets.start=csc.cpStart;
                return;
            }

            if(result>=0) {
                // replace the current code point with its full case mapping result
                // see UCASE_MAX_STRING_LENGTH
                if(result<=UCASE_MAX_STRING_LENGTH) {
                    // string s[result]
                    tmp.setTo(FALSE, s, result);
                    delta=result-U16_LENGTH(c);
                } else {
                    // single code point
                    tmp.setTo(result);
                    delta=tmp.length()-U16_LENGTH(c);
                }
                text.handleReplaceBetween(csc.cpStart, textPos, tmp);
                if(delta!=0) {
                    textPos+=delta;
                    csc.limit=offsets.contextLimit+=delta;
                    offsets.limit+=delta;
                }
            }
        }
    }
    offsets.start=textPos;
}
std::string GlobalizationNDK::getDateNames(const std::string& args)
{
    ENamesType type = kNamesWide;
    ENamesItem item = kNamesMonths;

    if (!args.empty()) {
        Json::Reader reader;
        Json::Value root;
        bool parse = reader.parse(args, root);

        if (!parse) {
            slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getDateNames: invalid json data: %s",
                    args.c_str());
            return errorInJson(PARSING_ERROR, "Parameters not valid json format!");
        }

        Json::Value options = root["options"];

        std::string error;
        if (!handleNamesOptions(options, type, item, error))
            return errorInJson(PARSING_ERROR, error);
    }

    int count;
    const char* pattern;
    DateFormat::EStyle dstyle;

    // Check ICU SimpleDateFormat document for patterns for months and days.
    // http://www.icu-project.org/apiref/icu4c/classicu_1_1SimpleDateFormat.html
    if (item == kNamesMonths) {
        count = 12;
        if (type == kNamesWide) {
            dstyle = DateFormat::kLong;
            pattern = "MMMM";
        } else {
            dstyle = DateFormat::kShort;
            pattern = "MMM";
        }
    } else {
        count = 7;
        if (type == kNamesWide) {
            dstyle = DateFormat::kLong;
            pattern = "eeee";
        } else {
            dstyle = DateFormat::kShort;
            pattern = "eee";
        }
    }

    UErrorCode status = U_ZERO_ERROR;
    const Locale& loc = Locale::getDefault();
    DateFormat* df = DateFormat::createDateInstance(dstyle, loc);

    if (!df) {
        slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getDateNames: unable to create DateFormat instance!");
        return errorInJson(UNKNOWN_ERROR, "Unable to create DateFormat instance!");
    }
    std::auto_ptr<DateFormat> deleter(df);

    if (df->getDynamicClassID() != SimpleDateFormat::getStaticClassID()) {
        slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getDateNames: DateFormat instance not SimpleDateFormat!");
        return errorInJson(UNKNOWN_ERROR, "DateFormat instance not SimpleDateFormat!");
    }

    SimpleDateFormat* sdf = (SimpleDateFormat*) df;
    sdf->applyLocalizedPattern(UnicodeString(pattern, -1), status);

    Calendar* cal = Calendar::createInstance(status);
    if (!cal) {
        slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getDateNames: unable to create Calendar instance: %x.",
                status);
        return errorInJson(UNKNOWN_ERROR, "Unable to create Calendar instance!");
    }
    std::auto_ptr<Calendar> caldeleter(cal);

    UCalendarDaysOfWeek ud = cal->getFirstDayOfWeek(status);
    if (status != U_ZERO_ERROR && status != U_ERROR_WARNING_START) {
        slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getDateNames: failed to getFirstDayOfWeek: %d!",
                status);
        return errorInJson(PARSING_ERROR, "Failed to getFirstDayOfWeek!");
    }

    if (ud == UCAL_SUNDAY)
        cal->set(2014, 0, 5);
    else
        cal->set(2014, 0, 6);

    std::list<std::string> utf8Names;

    for (int i = 0; i < count; ++i) {
        UnicodeString ucs;
        sdf->format(cal->getTime(status), ucs);

        if (item == kNamesMonths)
            cal->add(UCAL_MONTH, 1, status);
        else
            cal->add(UCAL_DAY_OF_MONTH, 1, status);

        if (ucs.isEmpty())
            continue;

        std::string utf8;
        ucs.toUTF8String(utf8);
        utf8Names.push_back(utf8);
    }

    if (!utf8Names.size()) {
        slog2f(0, ID_G11N, SLOG2_ERROR, "GlobalizationNDK::getDateNames: unable to get symbols: item: %d, type: %d.",
                item, type);
        return errorInJson(UNKNOWN_ERROR, "Unable to get symbols!");
    }

    return resultInJson(utf8Names);
}