예제 #1
udat_parseCalendar(const    UDateFormat*    format,
                            UCalendar*      calendar,
                   const    UChar*          text,
                            int32_t         textLength,
                            int32_t         *parsePos,
                            UErrorCode      *status)
    if(U_FAILURE(*status)) return;

    const UnicodeString src((UBool)(textLength == -1), text, textLength);
    ParsePosition pp;

    if(parsePos != 0)

    ((DateFormat*)format)->parse(src, *(Calendar*)calendar, pp);

    if(parsePos != 0) {
        if(pp.getErrorIndex() == -1)
            *parsePos = pp.getIndex();
        else {
            *parsePos = pp.getErrorIndex();
            *status = U_PARSE_ERROR;
예제 #2
DateFormat::parse(const UnicodeString& text,
                  ParsePosition& pos) const
    UDate d = 0; // Error return UDate is 0 (the epoch)
    if (fCalendar != NULL) {
        int32_t start = pos.getIndex();

        // Parse may update TimeZone used by the calendar.
        TimeZone *tzsav = (TimeZone*)fCalendar->getTimeZone().clone();

        parse(text, *fCalendar, pos);
        if (pos.getIndex() != start) {
            UErrorCode ec = U_ZERO_ERROR;
            d = fCalendar->getTime(ec);
            if (U_FAILURE(ec)) {
                // We arrive here if fCalendar is non-lenient and there
                // is an out-of-range field.  We don't know which field
                // was illegal so we set the error index to the start.
                d = 0;

        // Restore TimeZone
    return d;
예제 #3
        const MessagePattern &pattern, int32_t partIndex,
        const UnicodeString &source, ParsePosition &pos) {
    // find the best number (defined as the one with the longest parse)
    int32_t start = pos.getIndex();
    int32_t furthest = start;
    double bestNumber = uprv_getNaN();
    double tempNumber = 0.0;
    int32_t count = pattern.countParts();
    while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
        tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
        partIndex += 2;  // skip the numeric part and ignore the ARG_SELECTOR
        int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
        int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
        if (len >= 0) {
            int32_t newIndex = start + len;
            if (newIndex > furthest) {
                furthest = newIndex;
                bestNumber = tempNumber;
                if (furthest == source.length()) {
        partIndex = msgLimit + 1;
    if (furthest == start) {
    } else {
    return bestNumber;
예제 #4
파일: unum.cpp 프로젝트: 119120119/node
static void
parseRes(Formattable& res,
         const   UNumberFormat*  fmt,
         const   UChar*          text,
         int32_t         textLength,
         int32_t         *parsePos /* 0 = start */,
         UErrorCode      *status)

    const UnicodeString src((UBool)(textLength == -1), text, textLength);
    ParsePosition pp;

    if(parsePos != 0)

    ((const NumberFormat*)fmt)->parse(src, res, pp);

    if(pp.getErrorIndex() != -1) {
        *status = U_PARSE_ERROR;
        if(parsePos != 0) {
            *parsePos = pp.getErrorIndex();
    } else if(parsePos != 0) {
        *parsePos = pp.getIndex();
예제 #5
DateFormat::parse(const UnicodeString& text,
                  ParsePosition& pos) const
    UDate d = 0; // Error return UDate is 0 (the epoch)
    if (fCalendar != NULL) {
        Calendar* calClone = fCalendar->clone();
        if (calClone != NULL) {
            int32_t start = pos.getIndex();
            parse(text, *calClone, pos);
            if (pos.getIndex() != start) {
                UErrorCode ec = U_ZERO_ERROR;
                d = calClone->getTime(ec);
                if (U_FAILURE(ec)) {
                    // We arrive here if fCalendar => calClone is non-lenient and
                    // there is an out-of-range field.  We don't know which field
                    // was illegal so we set the error index to the start.
                    d = 0;
            delete calClone;
    return d;
예제 #6
U_CAPI int32_t U_EXPORT2 
uset_applyPattern(USet *set,
                  const UChar *pattern, int32_t patternLength,
                  uint32_t options,
                  UErrorCode *status){

    // status code needs to be checked since we 
    // dereference it
    if(status == NULL || U_FAILURE(*status)){
        return 0;

    // check only the set paramenter
    // if pattern is NULL or null terminate
    // UnicodeString constructor takes care of it
    if(set == NULL){
        *status = U_ILLEGAL_ARGUMENT_ERROR;
        return 0;

    UnicodeString pat(pattern, patternLength);

    ParsePosition pos;
    ((UnicodeSet*) set)->applyPattern(pat, pos, options, NULL, *status);
    return pos.getIndex();
예제 #7
NumberValcon::parse(const QString& text)
    if (text.isEmpty()) {
	_value = 0;
	return true;

    UnicodeString utext = convertToICU(text);

    // Parse the number using ICU
    UErrorCode status = U_ZERO_ERROR;
    NumberFormat* fmt = NumberFormat::createInstance(status);
    if (U_SUCCESS(status)) {
	Formattable value;
	ParsePosition pos;
	fmt->parse(utext, value, pos);
	if (pos.getErrorIndex() == -1 && pos.getIndex() == utext.length()) {
	    _value = value.getDouble(&status);
	    _value = value.getDouble(status);
	    return true;

    return false;
예제 #8
udat_parse(    const    UDateFormat*        format,
        const    UChar*          text,
        int32_t         textLength,
        int32_t         *parsePos,
        UErrorCode      *status)
    if(U_FAILURE(*status)) return (UDate)0;

    const UnicodeString src((UBool)(textLength == -1), text, textLength);
    ParsePosition pp;
    int32_t stackParsePos = 0;
    UDate res;

    if(parsePos == NULL) {
        parsePos = &stackParsePos;


    res = ((DateFormat*)format)->parse(src, pp);

    if(pp.getErrorIndex() == -1)
        *parsePos = pp.getIndex();
    else {
        *parsePos = pp.getErrorIndex();
        *status = U_PARSE_ERROR;

    return res;
static void
parseRes(Formattable& res,
         const   UNumberFormat*  fmt,
         const   UChar*          text,
         int32_t         textLength,
         int32_t         *parsePos /* 0 = start */,
         UBool parseCurrency,
         UErrorCode      *status)
    int32_t len = (textLength == -1 ? u_strlen(text) : textLength);
    const UnicodeString src((UChar*)text, len, len);
    ParsePosition pp;
    if(parsePos != 0)
    if (parseCurrency) {
        ((const NumberFormat*)fmt)->parseCurrency(src, res, pp);
    } else {
        ((const NumberFormat*)fmt)->parse(src, res, pp);
    if(pp.getErrorIndex() != -1) {
        *status = U_PARSE_ERROR;
        if(parsePos != 0) {
            *parsePos = pp.getErrorIndex();
    } else if(parsePos != 0) {
        *parsePos = pp.getIndex();
예제 #10
PluralFormat::parseObject(const UnicodeString& /*source*/,
                        Formattable& /*result*/,
                        ParsePosition& pos) const
    // Parsing not supported.
예제 #11
SelectFormat::parseObject(const UnicodeString& /*source*/,
                        Formattable& /*result*/,
                        ParsePosition& pos) const
    // TODO: not yet supported in icu4j and icu4c
예제 #12
void TimeUnitTest::test10219Plurals() {
    Locale usLocale("en_US");
    double values[2] = {1.588, 1.011};
    UnicodeString expected[2][3] = {
        {"1 minute", "1.5 minutes", "1.58 minutes"},
        {"1 minute", "1.0 minutes", "1.01 minutes"}
    UErrorCode status = U_ZERO_ERROR;
    TimeUnitFormat tuf(usLocale, status);
    if (U_FAILURE(status)) {
        dataerrln("generating TimeUnitFormat Object failed: %s", u_errorName(status));
    LocalPointer<DecimalFormat> nf((DecimalFormat *) NumberFormat::createInstance(usLocale, status));
    if (U_FAILURE(status)) {
        dataerrln("generating NumberFormat Object failed: %s", u_errorName(status));
    for (int32_t j = 0; j < UPRV_LENGTHOF(values); ++j) {
        for (int32_t i = 0; i < UPRV_LENGTHOF(expected[j]); ++i) {
            tuf.setNumberFormat(*nf, status);
            if (U_FAILURE(status)) {
                dataerrln("setting NumberFormat failed: %s", u_errorName(status));
            UnicodeString actual;
            Formattable fmt;
            LocalPointer<TimeUnitAmount> tamt(
                new TimeUnitAmount(values[j], TimeUnit::UTIMEUNIT_MINUTE, status), status);
            if (U_FAILURE(status)) {
                dataerrln("generating TimeUnitAmount Object failed: %s", u_errorName(status));
            tuf.format(fmt, actual, status);
            if (U_FAILURE(status)) {
                dataerrln("Actual formatting failed: %s", u_errorName(status));
            if (expected[j][i] != actual) {
                errln("Expected " + expected[j][i] + ", got " + actual);

    // test parsing
    Formattable result;
    ParsePosition pos;
    UnicodeString formattedString = "1 minutes";
    tuf.parseObject(formattedString, result, pos);
    if (formattedString.length() != pos.getIndex()) {
        errln("Expect parsing to go all the way to the end of the string.");
예제 #13
 * @bug 4104136
void DateFormatRegressionTest::Test4104136(void) 
    UErrorCode status = U_ZERO_ERROR;
    SimpleDateFormat *sdf = new SimpleDateFormat(status); 
    if(U_FAILURE(status)) {
      dataerrln("Couldn't create SimpleDateFormat, error %s", u_errorName(status));
      delete sdf;
    if(failure(status, "new SimpleDateFormat")) return;
    UnicodeString pattern = "'time' hh:mm"; 
    logln("pattern: \"" + pattern + "\""); 

    UnicodeString strings [] = {
        (UnicodeString)"time 10:30",
        (UnicodeString) "time 10:x",
        (UnicodeString) "time 10x"

    ParsePosition ppos [] = {

    UDate dates [] = {
        date(70, UCAL_JANUARY, 1, 10, 30),

    /*Object[] DATA = {
        "time 10:30", new ParsePosition(10), new Date(70, Calendar.JANUARY, 1, 10, 30),
        "time 10:x", new ParsePosition(0), null,
        "time 10x", new ParsePosition(0), null,
    for(int i = 0; i < 3; i++) {
        UnicodeString text = strings[i];
        ParsePosition finish = ppos[i];
        UDate exp = dates[i];
        ParsePosition pos(0);
        UDate d = sdf->parse(text, pos);
        logln(" text: \"" + text + "\""); 
        logln(" index: %d", pos.getIndex()); 
        logln((UnicodeString) " result: " + d);
        if(pos.getIndex() != finish.getIndex())
            errln("Fail: Expected pos " + finish.getIndex());
        if (! ((d == 0 && exp == -1) || (d == exp)))
            errln((UnicodeString) "Fail: Expected result " + exp);

    delete sdf;
예제 #14
 * If this is a &gt;&gt;&gt; substitution, match only against ruleToUse.
 * Otherwise, use the superclass function.
 * @param text The string to parse
 * @param parsePosition Ignored on entry, updated on exit to point to
 * the first unmatched character.
 * @param baseValue The partial parse result prior to calling this
 * routine.
ModulusSubstitution::doParse(const UnicodeString& text,
                             ParsePosition& parsePosition,
                             double baseValue,
                             double upperBound,
                             UBool lenientParse,
                             Formattable& result) const
    // if this isn't a >>> substitution, we can just use the
    // inherited parse() routine to do the parsing
    if (ruleToUse == NULL) {
        return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, result);

        // but if it IS a >>> substitution, we have to do it here: we
        // use the specific rule's doParse() method, and then we have to
        // do some of the other work of NFRuleSet.parse()
    } else {
        ruleToUse->doParse(text, parsePosition, FALSE, upperBound, result);

        if (parsePosition.getIndex() != 0) {
            UErrorCode status = U_ZERO_ERROR;
            double tempResult = result.getDouble(status);
            tempResult = composeRuleValue(tempResult, baseValue);

        return TRUE;
예제 #15
ChoiceFormat::parse(const UnicodeString& text, 
                    Formattable& result,
                    ParsePosition& status) const
    // find the best number (defined as the one with the longest parse)
    int32_t start = status.getIndex();
    int32_t furthest = start;
    double bestNumber = uprv_getNaN();
    double tempNumber = 0.0;
    for (int i = 0; i < fCount; ++i) {
        int32_t len = fChoiceFormats[i].length();
        if (text.compare(start, len, fChoiceFormats[i]) == 0) {
            status.setIndex(start + len);
            tempNumber = fChoiceLimits[i];
            if (status.getIndex() > furthest) {
                furthest = status.getIndex();
                bestNumber = tempNumber;
                if (furthest == text.length()) 
    if (status.getIndex() == start) {
예제 #16
RuleBasedNumberFormat::parse(const UnicodeString& text,
                             Formattable& result,
                             ParsePosition& parsePosition) const
    //TODO: We need a real fix.  See #6895 / #6896
    if (noParse) {
        // skip parsing

    if (!ruleSets) {

    UnicodeString workingText(text, parsePosition.getIndex());
    ParsePosition workingPos(0);

    ParsePosition high_pp(0);
    Formattable high_result;

    for (NFRuleSet** p = ruleSets; *p; ++p) {
        NFRuleSet *rp = *p;
        if (rp->isPublic() && rp->isParseable()) {
            ParsePosition working_pp(0);
            Formattable working_result;

            rp->parse(workingText, working_pp, kMaxDouble, working_result);
            if (working_pp.getIndex() > high_pp.getIndex()) {
                high_pp = working_pp;
                high_result = working_result;

                if (high_pp.getIndex() == workingText.length()) {

    int32_t startIndex = parsePosition.getIndex();
    parsePosition.setIndex(startIndex + high_pp.getIndex());
    if (high_pp.getIndex() > 0) {
    } else {
        int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
        parsePosition.setErrorIndex(startIndex + errorIndex);
    result = high_result;
    if (result.getType() == Formattable::kDouble) {
        int32_t r = (int32_t)result.getDouble();
        if ((double)r == result.getDouble()) {
예제 #17
// RBBISymbolTable::parseReference   This function from the abstract symbol table interface
//                                   looks for a $variable name in the source text.
//                                   It does not look it up, only scans for it.
//                                   It is used by the UnicodeSet parser.
//                                   This implementation is lifted pretty much verbatim
//                                   from the rules based transliterator implementation.
//                                   I didn't see an obvious way of sharing it.
UnicodeString   RBBISymbolTable::parseReference(const UnicodeString& text,
                                                ParsePosition& pos, int32_t limit) const
    int32_t start = pos.getIndex();
    int32_t i = start;
    UnicodeString result;
    while (i < limit) {
        UChar c = text.charAt(i);
        if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
    if (i == start) { // No valid name chars
        return result; // Indicate failure with empty string
    text.extractBetween(start, i, result);
    return result;
예제 #18
파일: unum.cpp 프로젝트: 119120119/node
unum_parseDoubleCurrency(const UNumberFormat* fmt,
                         const UChar* text,
                         int32_t textLength,
                         int32_t* parsePos, /* 0 = start */
                         UChar* currency,
                         UErrorCode* status) {
    double doubleVal = 0.0;
    currency[0] = 0;
    if (U_FAILURE(*status)) {
        return doubleVal;
    const UnicodeString src((UBool)(textLength == -1), text, textLength);
    ParsePosition pp;
    if (parsePos != NULL) {
    *status = U_PARSE_ERROR; // assume failure, reset if succeed
    LocalPointer<CurrencyAmount> currAmt(((const NumberFormat*)fmt)->parseCurrency(src, pp));
    if (pp.getErrorIndex() != -1) {
        if (parsePos != NULL) {
            *parsePos = pp.getErrorIndex();
    } else {
        if (parsePos != NULL) {
            *parsePos = pp.getIndex();
        if (pp.getIndex() > 0) {
            *status = U_ZERO_ERROR;
            u_strcpy(currency, currAmt->getISOCurrency());
            doubleVal = currAmt->getNumber().getDouble(*status);
    return doubleVal;
예제 #19
DateFormat::parse(const UnicodeString& text,
                  ParsePosition& pos) const
    if (fCalendar != NULL) {
        int32_t start = pos.getIndex();
        parse(text, *fCalendar, pos);
        if (pos.getIndex() != start) {
            UErrorCode ec = U_ZERO_ERROR;
            UDate d = fCalendar->getTime(ec);
            if (U_SUCCESS(ec)) {
                return d; // Successful function exit
            // We arrive here if fCalendar is non-lenient and there
            // is an out-of-range field.  We don't know which field
            // was illegal so we set the error index to the start.
    return 0; // Error return UDate is 0 (the epoch)
예제 #20
void RelativeDateFormat::parse( const UnicodeString& text,
                                Calendar& cal,
                                ParsePosition& pos) const {

    // Can the fDateFormat parse it?
    if(fDateFormat != NULL) {
        ParsePosition aPos(pos);
        if((aPos.getIndex() != pos.getIndex()) &&
                (aPos.getErrorIndex()==-1)) {
            pos=aPos; // copy the sub parse
            return; // parsed subfmt OK

    // Linear search the relative strings
    for(int n=0; n<fDatesLen; n++) {
        if(fDates[n].string != NULL &&
                                 fDates[n].string))) {
            UErrorCode status = U_ZERO_ERROR;

            // Set the calendar to now+offset
            cal.add(UCAL_DATE,fDates[n].offset, status);

            if(U_FAILURE(status)) {
                // failure in setting calendar fields
            } else {

    // parse failed
예제 #21
void DecimalFormatTest::execParseTest(int32_t lineNum,
                                     const UnicodeString &inputText,
                                     const UnicodeString &expectedType,
                                     const UnicodeString &expectedDecimal,
                                     UErrorCode &status) {
    if (U_FAILURE(status)) {

    DecimalFormatSymbols symbols(Locale::getUS(), status);
    UnicodeString pattern = UNICODE_STRING_SIMPLE("####");
    DecimalFormat format(pattern, symbols, status);
    Formattable   result;
    if (U_FAILURE(status)) {
        errln("file dcfmtest.txt, line %d: %s error creating the formatter.",
            lineNum, u_errorName(status));

    ParsePosition pos;
    int32_t expectedParseEndPosition = inputText.length();

    format.parse(inputText, result, pos);

    if (expectedParseEndPosition != pos.getIndex()) {
        errln("file dcfmtest.txt, line %d: Expected parse position afeter parsing: %d.  "
              "Actual parse position: %d", expectedParseEndPosition, pos.getIndex());

    char   expectedTypeC[2];
    expectedType.extract(0, 1, expectedTypeC, 2, US_INV);
    Formattable::Type expectType = Formattable::kDate;
    switch (expectedTypeC[0]) {
      case 'd': expectType = Formattable::kDouble; break;
      case 'i': expectType = Formattable::kLong;   break;
      case 'l': expectType = Formattable::kInt64;  break;
          errln("file dcfmtest.tx, line %d: unrecongized expected type \"%s\"",
              lineNum, InvariantStringPiece(expectedType).data());
    if (result.getType() != expectType) {
        errln("file dcfmtest.txt, line %d: expectedParseType(%s) != actual parseType(%s)",
             lineNum, formattableType(expectType), formattableType(result.getType()));

    StringPiece decimalResult = result.getDecimalNumber(status);
    if (U_FAILURE(status)) {
        errln("File %s, line %d: error %s.  Line in file dcfmtest.txt:  %d:",
            __FILE__, __LINE__, u_errorName(status), lineNum);

    InvariantStringPiece expectedResults(expectedDecimal);
    if (decimalResult != expectedResults) {
        errln("file dcfmtest.txt, line %d: expected \"%s\", got \"%s\"",
            lineNum, expectedResults.data(), decimalResult.data());
예제 #22
//  scanSet    Construct a UnicodeSet from the text at the current scan
//             position.  Advance the scan position to the first character
//             after the set.
//             A new RBBI setref node referring to the set is pushed onto the node
//             stack.
//             The scan position is normally under the control of the state machine
//             that controls rule parsing.  UnicodeSets, however, are parsed by
//             the UnicodeSet constructor, not by the RBBI rule parser.
void RBBIRuleScanner::scanSet() {
    UnicodeSet    *uset;
    ParsePosition  pos;
    int            startPos;
    int            i;

    if (U_FAILURE(*fRB->fStatus)) {

    startPos = fScanIndex;
    UErrorCode localStatus = U_ZERO_ERROR;
    uset = new UnicodeSet(fRB->fRules, pos, USET_IGNORE_SPACE,
    if (U_FAILURE(localStatus)) {
        //  TODO:  Get more accurate position of the error from UnicodeSet's return info.
        //         UnicodeSet appears to not be reporting correctly at this time.
        RBBIDebugPrintf("UnicodeSet parse postion.ErrorIndex = %d\n", pos.getIndex());
        delete uset;

    // Verify that the set contains at least one code point.
    if (uset->isEmpty()) {
        // This set is empty.
        //  Make it an error, because it almost certainly is not what the user wanted.
        //  Also, avoids having to think about corner cases in the tree manipulation code
        //   that occurs later on.
        delete uset;

    // Advance the RBBI parse postion over the UnicodeSet pattern.
    //   Don't just set fScanIndex because the line/char positions maintained
    //   for error reporting would be thrown off.
    i = pos.getIndex();
    for (;;) {
        if (fNextIndex >= i) {

    if (U_SUCCESS(*fRB->fStatus)) {
        RBBINode         *n;

        n = pushNewNode(RBBINode::setRef);
        n->fFirstPos = startPos;
        n->fLastPos  = fNextIndex;
        fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
        //  findSetFor() serves several purposes here:
        //     - Adopts storage for the UnicodeSet, will be responsible for deleting.
        //     - Mantains collection of all sets in use, needed later for establishing
        //          character categories for run time engine.
        //     - Eliminates mulitiple instances of the same set.
        //     - Creates a new uset node if necessary (if this isn't a duplicate.)
        findSetFor(n->fText, n, uset);

예제 #23
NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
    // try matching each rule in the rule set against the text being
    // parsed.  Whichever one matches the most characters is the one
    // that determines the value we return.


    // dump out if there's no text to parse
    if (text.length() == 0) {
        return 0;

    ParsePosition highWaterMark;
    ParsePosition workingPos = pos;

    fprintf(stderr, "<nfrs> %x '", this);
    dumpUS(stderr, name);
    fprintf(stderr, "' text '");
    dumpUS(stderr, text);
    fprintf(stderr, "'\n");
    fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);

    // start by trying the negative number rule (if there is one)
    if (negativeNumberRule) {
        Formattable tempResult;
        fprintf(stderr, "  <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound);
        UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult);
        fprintf(stderr, "  <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex());
        if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
            result = tempResult;
            highWaterMark = workingPos;
        workingPos = pos;
    fprintf(stderr, "<nfrs> continue fractional with text '");
    dumpUS(stderr, text);
    fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
    // then try each of the fraction rules
        for (int i = 0; i < 3; i++) {
            if (fractionRules[i]) {
                Formattable tempResult;
                UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
                if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
                    result = tempResult;
                    highWaterMark = workingPos;
                workingPos = pos;
    fprintf(stderr, "<nfrs> continue other with text '");
    dumpUS(stderr, text);
    fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());

    // finally, go through the regular rules one at a time.  We start
    // at the end of the list because we want to try matching the most
    // sigificant rule first (this helps ensure that we parse
    // "five thousand three hundred six" as
    // "(five thousand) (three hundred) (six)" rather than
    // "((five thousand three) hundred) (six)").  Skip rules whose
    // base values are higher than the upper bound (again, this helps
    // limit ambiguity by making sure the rules that match a rule's
    // are less significant than the rule containing the substitutions)/
        int64_t ub = util64_fromDouble(upperBound);
            char ubstr[64];
            util64_toa(ub, ubstr, 64);
            char ubstrhex[64];
            util64_toa(ub, ubstrhex, 64, 16);
            fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
        for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
            if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
            Formattable tempResult;
            UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
            if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
                result = tempResult;
                highWaterMark = workingPos;
            workingPos = pos;
    fprintf(stderr, "<nfrs> exit\n");
    // finally, update the parse postion we were passed to point to the
    // first character we didn't use, and return the result that
    // corresponds to that string of characters
    pos = highWaterMark;

    return 1;
예제 #24
FractionalPartSubstitution::doParse(const UnicodeString& text,
                ParsePosition& parsePosition,
                double baseValue,
                double /*upperBound*/,
                UBool lenientParse,
                Formattable& resVal) const
    // if we're not in byDigits mode, we can just use the inherited
    // doParse()
    if (!byDigits) {
        return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, resVal);

        // if we ARE in byDigits mode, parse the text one digit at a time
        // using this substitution's owning rule set (we do this by setting
        // upperBound to 10 when calling doParse() ) until we reach
        // nonmatching text
    } else {
        UnicodeString workText(text);
        ParsePosition workPos(1);
        double result = 0;
        int32_t digit;
//          double p10 = 0.1;

        DigitList dl;
        NumberFormat* fmt = NULL;
        while (workText.length() > 0 && workPos.getIndex() != 0) {
            Formattable temp;
            getRuleSet()->parse(workText, workPos, 10, temp);
            UErrorCode status = U_ZERO_ERROR;
            digit = temp.getLong(status);
//            digit = temp.getType() == Formattable::kLong ?
//               temp.getLong() :
//            (int32_t)temp.getDouble();

            if (lenientParse && workPos.getIndex() == 0) {
                if (!fmt) {
                    status = U_ZERO_ERROR;
                    fmt = NumberFormat::createInstance(status);
                    if (U_FAILURE(status)) {
                        delete fmt;
                        fmt = NULL;
                if (fmt) {
                    fmt->parse(workText, temp, workPos);
                    digit = temp.getLong(status);

            if (workPos.getIndex() != 0) {
                dl.append((char)('0' + digit));
//                  result += digit * p10;
//                  p10 /= 10;
                parsePosition.setIndex(parsePosition.getIndex() + workPos.getIndex());
                workText.removeBetween(0, workPos.getIndex());
                while (workText.length() > 0 && workText.charAt(0) == gSpace) {
                    workText.removeBetween(0, 1);
                    parsePosition.setIndex(parsePosition.getIndex() + 1);
        delete fmt;

        result = dl.fCount == 0 ? 0 : dl.getDouble();
        result = composeRuleValue(result, baseValue);
        return TRUE;
예제 #25
void RelativeDateFormat::parse( const UnicodeString& text,
                    Calendar& cal,
                    ParsePosition& pos) const {

    int32_t startIndex = pos.getIndex();
    if (fDatePattern.isEmpty()) {
        // no date pattern, try parsing as time
    } else if (fTimePattern.isEmpty() || fCombinedFormat == NULL) {
        // no time pattern or way to combine, try parsing as date
        // first check whether text matches a relativeDayString
        UBool matchedRelative = FALSE;
        for (int n=0; n < fDatesLen && !matchedRelative; n++) {
            if (fDates[n].string != NULL &&
                    text.compare(startIndex, fDates[n].len, fDates[n].string) == 0) {
                // it matched, handle the relative day string
                UErrorCode status = U_ZERO_ERROR;
                matchedRelative = TRUE;

                // Set the calendar to now+offset
                cal.add(UCAL_DATE,fDates[n].offset, status);

                if(U_FAILURE(status)) {
                    // failure in setting calendar field, set offset to beginning of rel day string
                } else {
                    pos.setIndex(startIndex + fDates[n].len);
        if (!matchedRelative) {
            // just parse as normal date
    } else {
        // Here we replace any relativeDayString in text with the equivalent date
        // formatted per fDatePattern, then parse text normally using the combined pattern.
        UnicodeString modifiedText(text);
        FieldPosition fPos;
        int32_t dateStart = 0, origDateLen = 0, modDateLen = 0;
        UErrorCode status = U_ZERO_ERROR;
        for (int n=0; n < fDatesLen; n++) {
            int32_t relativeStringOffset;
            if (fDates[n].string != NULL &&
                    (relativeStringOffset = modifiedText.indexOf(fDates[n].string, fDates[n].len, startIndex)) >= startIndex) {
                // it matched, replace the relative date with a real one for parsing
                UnicodeString dateString;
                Calendar * tempCal = cal.clone();

                // Set the calendar to now+offset
                tempCal->add(UCAL_DATE,fDates[n].offset, status);
                if(U_FAILURE(status)) {
                    delete tempCal;

                fDateTimeFormatter->format(*tempCal, dateString, fPos);
                dateStart = relativeStringOffset;
                origDateLen = fDates[n].len;
                modDateLen = dateString.length();
                modifiedText.replace(dateStart, origDateLen, dateString);
                delete tempCal;
        UnicodeString combinedPattern;
        fCombinedFormat->format(fTimePattern, fDatePattern, combinedPattern, status);

        // Adjust offsets
        UBool noError = (pos.getErrorIndex() < 0);
        int32_t offset = (noError)? pos.getIndex(): pos.getErrorIndex();
        if (offset >= dateStart + modDateLen) {
            // offset at or after the end of the replaced text,
            // correct by the difference between original and replacement
            offset -= (modDateLen - origDateLen);
        } else if (offset >= dateStart) {
            // offset in the replaced text, set it to the beginning of that text
            // (i.e. the beginning of the relative day string)
            offset = dateStart;
        if (noError) {
        } else {
예제 #26
NumeratorSubstitution::doParse(const UnicodeString& text, 
                               ParsePosition& parsePosition,
                               double baseValue,
                               double upperBound,
                               UBool /*lenientParse*/,
                               Formattable& result) const
    // we don't have to do anything special to do the parsing here,
    // but we have to turn lenient parsing off-- if we leave it on,
    // it SERIOUSLY messes up the algorithm

    // if withZeros is true, we need to count the zeros
    // and use that to adjust the parse result
    UErrorCode status = U_ZERO_ERROR;
    int32_t zeroCount = 0;
    UnicodeString workText(text);

    if (withZeros) {
        ParsePosition workPos(1);
        Formattable temp;

        while (workText.length() > 0 && workPos.getIndex() != 0) {
            getRuleSet()->parse(workText, workPos, 1, temp); // parse zero or nothing at all
            if (workPos.getIndex() == 0) {
                // we failed, either there were no more zeros, or the number was formatted with digits
                // either way, we're done

            parsePosition.setIndex(parsePosition.getIndex() + workPos.getIndex());
            workText.remove(0, workPos.getIndex());
            while (workText.length() > 0 && workText.charAt(0) == gSpace) {
                workText.remove(0, 1);
                parsePosition.setIndex(parsePosition.getIndex() + 1);

        workText = text;
        workText.remove(0, (int32_t)parsePosition.getIndex());

    // we've parsed off the zeros, now let's parse the rest from our current position
    NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, FALSE, result);

    if (withZeros) {
        // any base value will do in this case.  is there a way to
        // force this to not bother trying all the base values?

        // compute the 'effective' base and prescale the value down
        int64_t n = result.getLong(status); // force conversion!
        int64_t d = 1;
        int32_t pow = 0;
        while (d <= n) {
            d *= 10;
        // now add the zeros
        while (zeroCount > 0) {
            d *= 10;
        // d is now our true denominator

    return TRUE;
예제 #27
 * Parses a string using the rule set or DecimalFormat belonging
 * to this substitution.  If there's a match, a mathematical
 * operation (the inverse of the one used in formatting) is
 * performed on the result of the parse and the value passed in
 * and returned as the result.  The parse position is updated to
 * point to the first unmatched character in the string.
 * @param text The string to parse
 * @param parsePosition On entry, ignored, but assumed to be 0.
 * On exit, this is updated to point to the first unmatched
 * character (or 0 if the substitution didn't match)
 * @param baseValue A partial parse result that should be
 * combined with the result of this parse
 * @param upperBound When searching the rule set for a rule
 * matching the string passed in, only rules with base values
 * lower than this are considered
 * @param lenientParse If true and matching against rules fails,
 * the substitution will also try matching the text against
 * numerals using a default-costructed NumberFormat.  If false,
 * no extra work is done.  (This value is false whenever the
 * formatter isn't in lenient-parse mode, but is also false
 * under some conditions even when the formatter _is_ in
 * lenient-parse mode.)
 * @return If there's a match, this is the result of composing
 * baseValue with whatever was returned from matching the
 * characters.  This will be either a Long or a Double.  If there's
 * no match this is new Long(0) (not null), and parsePosition
 * is left unchanged.
NFSubstitution::doParse(const UnicodeString& text,
                        ParsePosition& parsePosition,
                        double baseValue,
                        double upperBound,
                        UBool lenientParse,
                        Formattable& result) const
    fprintf(stderr, "<nfsubs> %x bv: %g ub: %g\n", this, baseValue, upperBound);
    // figure out the highest base value a rule can have and match
    // the text being parsed (this varies according to the type of
    // substitutions: multiplier, modulus, and numerator substitutions
    // restrict the search to rules with base values lower than their
    // own; same-value substitutions leave the upper bound wherever
    // it was, and the others allow any rule to match
    upperBound = calcUpperBound(upperBound);

    // use our rule set to parse the text.  If that fails and
    // lenient parsing is enabled (this is always false if the
    // formatter's lenient-parsing mode is off, but it may also
    // be false even when the formatter's lenient-parse mode is
    // on), then also try parsing the text using a default-
    // constructed NumberFormat
    if (ruleSet != NULL) {
        ruleSet->parse(text, parsePosition, upperBound, result);
        if (lenientParse && !ruleSet->isFractionRuleSet() && parsePosition.getIndex() == 0) {
            UErrorCode status = U_ZERO_ERROR;
            NumberFormat* fmt = NumberFormat::createInstance(status);
            if (U_SUCCESS(status)) {
                fmt->parse(text, result, parsePosition);
            delete fmt;

        // ...or use our DecimalFormat to parse the text
    } else if (numberFormat != NULL) {
        numberFormat->parse(text, result, parsePosition);

    // if the parse was successful, we've already advanced the caller's
    // parse position (this is the one function that doesn't have one
    // of its own).  Derive a parse result and return it as a Long,
    // if possible, or a Double
    if (parsePosition.getIndex() != 0) {
        UErrorCode status = U_ZERO_ERROR;
        double tempResult = result.getDouble(status);

        // composeRuleValue() produces a full parse result from
        // the partial parse result passed to this function from
        // the caller (this is either the owning rule's base value
        // or the partial result obtained from composing the
        // owning rule's base value with its other substitution's
        // parse result) and the partial parse result obtained by
        // matching the substitution (which will be the same value
        // the caller would get by parsing just this part of the
        // text with RuleBasedNumberFormat.parse() ).  How the two
        // values are used to derive the full parse result depends
        // on the types of substitutions: For a regular rule, the
        // ultimate result is its multiplier substitution's result
        // times the rule's divisor (or the rule's base value) plus
        // the modulus substitution's result (which will actually
        // supersede part of the rule's base value).  For a negative-
        // number rule, the result is the negative of its substitution's
        // result.  For a fraction rule, it's the sum of its two
        // substitution results.  For a rule in a fraction rule set,
        // it's the numerator substitution's result divided by
        // the rule's base value.  Results from same-value substitutions
        // propagate back upard, and null substitutions don't affect
        // the result.
        tempResult = composeRuleValue(tempResult, baseValue);
        return TRUE;
        // if the parse was UNsuccessful, return 0
    } else {
        return FALSE;
예제 #28
static jobject parseRBNFImpl(JNIEnv *env, jclass clazz, jint addr, jstring text, 
        jobject position, jboolean lenient) {

    // LOGI("ENTER parseRBNFImpl");

    const char * parsePositionClassName = "java/text/ParsePosition";
    const char * longClassName = "java/lang/Long";
    const char * doubleClassName = "java/lang/Double";

    UErrorCode status = U_ZERO_ERROR;

    UNumberFormat *fmt = (UNumberFormat *)(int)addr;

    jchar *str = (UChar *)env->GetStringChars(text, NULL);
    int strlength = env->GetStringLength(text);

    jclass parsePositionClass = env->FindClass(parsePositionClassName);
    jclass longClass =  env->FindClass(longClassName);
    jclass doubleClass =  env->FindClass(doubleClassName);

    jmethodID getIndexMethodID = env->GetMethodID(parsePositionClass, 
            "getIndex", "()I");
    jmethodID setIndexMethodID = env->GetMethodID(parsePositionClass, 
            "setIndex", "(I)V");
    jmethodID setErrorIndexMethodID = env->GetMethodID(parsePositionClass, 
            "setErrorIndex", "(I)V");

    jmethodID longInitMethodID = env->GetMethodID(longClass, "<init>", "(J)V");
    jmethodID dblInitMethodID = env->GetMethodID(doubleClass, "<init>", "(D)V");

    int parsePos = env->CallIntMethod(position, getIndexMethodID, NULL);

    // make sure the ParsePosition is valid. Actually icu4c would parse a number 
    // correctly even if the parsePosition is set to -1, but since the RI fails 
    // for that case we have to fail too
    if(parsePos < 0 || parsePos > strlength) {
        return NULL;

    Formattable res;

    const UnicodeString src((UChar*)str, strlength, strlength);
    ParsePosition pp;
    if(lenient) {
        unum_setAttribute(fmt, UNUM_LENIENT_PARSE, JNI_TRUE);
    ((const NumberFormat*)fmt)->parse(src, res, pp);

    if(lenient) {
        unum_setAttribute(fmt, UNUM_LENIENT_PARSE, JNI_FALSE);
    env->ReleaseStringChars(text, str);

    if(pp.getErrorIndex() == -1) {
        parsePos = pp.getIndex();
    } else {
        env->CallVoidMethod(position, setErrorIndexMethodID, 
                (jint) pp.getErrorIndex());        
        return NULL;

    Formattable::Type numType;
    numType = res.getType();
    UErrorCode fmtStatus;

    double resultDouble;
    long resultLong;
    int64_t resultInt64;

    switch(numType) {
        case Formattable::kDouble:
            resultDouble = res.getDouble();
            env->CallVoidMethod(position, setIndexMethodID, (jint) parsePos);
            return env->NewObject(doubleClass, dblInitMethodID, 
                    (jdouble) resultDouble);
        case Formattable::kLong:
            resultLong = res.getLong();
            env->CallVoidMethod(position, setIndexMethodID, (jint) parsePos);
            return env->NewObject(longClass, longInitMethodID, 
                    (jlong) resultLong);
        case Formattable::kInt64:
            resultInt64 = res.getInt64();
            env->CallVoidMethod(position, setIndexMethodID, (jint) parsePos);
            return env->NewObject(longClass, longInitMethodID, 
                    (jlong) resultInt64);

    return NULL;
예제 #29
TimeUnitFormat::parseObject(const UnicodeString& source,
                            Formattable& result,
                            ParsePosition& pos) const {
    Formattable resultNumber(0.0);
    UBool withNumberFormat = false;
    TimeUnit::UTimeUnitFields resultTimeUnit = TimeUnit::UTIMEUNIT_FIELD_COUNT;
    int32_t oldPos = pos.getIndex();
    int32_t newPos = -1;
    int32_t longestParseDistance = 0;
    UnicodeString* countOfLongestMatch = NULL;
    char res[1000];
    source.extract(0, source.length(), res, "UTF-8");
    std::cout << "parse source: " << res << "\n";
    // parse by iterating through all available patterns
    // and looking for the longest match.
    for (TimeUnit::UTimeUnitFields i = TimeUnit::UTIMEUNIT_YEAR;
         i < TimeUnit::UTIMEUNIT_FIELD_COUNT;
         i = (TimeUnit::UTimeUnitFields)(i+1)) {
        Hashtable* countToPatterns = fTimeUnitToCountToPatterns[i];
        int32_t elemPos = UHASH_FIRST;
        const UHashElement* elem = NULL;
        while ((elem = countToPatterns->nextElement(elemPos)) != NULL){
            const UHashTok keyTok = elem->key;
            UnicodeString* count = (UnicodeString*)keyTok.pointer;
            count->extract(0, count->length(), res, "UTF-8");
            std::cout << "parse plural count: " << res << "\n";
            const UHashTok valueTok = elem->value;
            // the value is a pair of MessageFormat*
            MessageFormat** patterns = (MessageFormat**)valueTok.pointer;
            for (UTimeUnitFormatStyle style = UTMUTFMT_FULL_STYLE; style < UTMUTFMT_FORMAT_STYLE_COUNT;
                 style = (UTimeUnitFormatStyle)(style + 1)) {
                MessageFormat* pattern = patterns[style];
                // see if we can parse
                Formattable parsed;
                pattern->parseObject(source, parsed, pos);
                if (pos.getErrorIndex() != -1 || pos.getIndex() == oldPos) {
    #ifdef TMUTFMT_DEBUG
                std::cout << "parsed.getType: " << parsed.getType() << "\n";
                Formattable tmpNumber(0.0);
                if (pattern->getArgTypeCount() != 0) {
                    Formattable& temp = parsed[0];
                    if (temp.getType() == Formattable::kString) {
                        UnicodeString tmpString;
                        UErrorCode pStatus = U_ZERO_ERROR;
                        getNumberFormat().parse(temp.getString(tmpString), tmpNumber, pStatus);
                        if (U_FAILURE(pStatus)) {
                    } else if (temp.isNumeric()) {
                        tmpNumber = temp;
                    } else {
                int32_t parseDistance = pos.getIndex() - oldPos;
                if (parseDistance > longestParseDistance) {
                    if (pattern->getArgTypeCount() != 0) {
                        resultNumber = tmpNumber;
                        withNumberFormat = true;
                    } else {
                        withNumberFormat = false;
                    resultTimeUnit = i;
                    newPos = pos.getIndex();
                    longestParseDistance = parseDistance;
                    countOfLongestMatch = count;
    /* After find the longest match, parse the number.
     * Result number could be null for the pattern without number pattern.
     * such as unit pattern in Arabic.
     * When result number is null, use plural rule to set the number.
    if (withNumberFormat == false && longestParseDistance != 0) {
        // set the number using plurrual count
        if (0 == countOfLongestMatch->compare(PLURAL_COUNT_ZERO, 4)) {
            resultNumber = Formattable(0.0);
        } else if (0 == countOfLongestMatch->compare(PLURAL_COUNT_ONE, 3)) {
            resultNumber = Formattable(1.0);
        } else if (0 == countOfLongestMatch->compare(PLURAL_COUNT_TWO, 3)) {
            resultNumber = Formattable(2.0);
        } else {
            // should not happen.
            // TODO: how to handle?
            resultNumber = Formattable(3.0);
    if (longestParseDistance == 0) {
    } else {
        UErrorCode status = U_ZERO_ERROR;
        TimeUnitAmount* tmutamt = new TimeUnitAmount(resultNumber, resultTimeUnit, status);
        if (U_SUCCESS(status)) {
        } else {
예제 #30
TimeUnitFormat::parseObject(const UnicodeString& source, 
                            Formattable& result,
                            ParsePosition& pos) const {
    double resultNumber = -1; 
    UBool withNumberFormat = false;
    TimeUnit::UTimeUnitFields resultTimeUnit = TimeUnit::UTIMEUNIT_FIELD_COUNT;
    int32_t oldPos = pos.getIndex();
    int32_t newPos = -1;
    int32_t longestParseDistance = 0;
    UnicodeString* countOfLongestMatch = NULL;
    char res[1000];
    source.extract(0, source.length(), res, "UTF-8");
    std::cout << "parse source: " << res << "\n";           
    // parse by iterating through all available patterns
    // and looking for the longest match.
    for (TimeUnit::UTimeUnitFields i = TimeUnit::UTIMEUNIT_YEAR;
         i < TimeUnit::UTIMEUNIT_FIELD_COUNT;
         i = (TimeUnit::UTimeUnitFields)(i+1)) {
        Hashtable* countToPatterns = fTimeUnitToCountToPatterns[i];
        int32_t elemPos = -1;
        const UHashElement* elem = NULL;
        while ((elem = countToPatterns->nextElement(elemPos)) != NULL){
            const UHashTok keyTok = elem->key;
            UnicodeString* count = (UnicodeString*)keyTok.pointer;
            count->extract(0, count->length(), res, "UTF-8");
            std::cout << "parse plural count: " << res << "\n";           
            const UHashTok valueTok = elem->value;
            // the value is a pair of MessageFormat*
            MessageFormat** patterns = (MessageFormat**)valueTok.pointer;
            for (EStyle style = kFull; style < kTotal; style = (EStyle)(style + 1)) {
                MessageFormat* pattern = patterns[style];
                // see if we can parse
                Formattable parsed;
                pattern->parseObject(source, parsed, pos);
                if (pos.getErrorIndex() != -1 || pos.getIndex() == oldPos) {
    #ifdef TMUTFMT_DEBUG
                std::cout << "parsed.getType: " << parsed.getType() << "\n";
                double tmpNumber = 0;
                if (pattern->getArgTypeCount() != 0) {
                    // pattern with Number as beginning, such as "{0} d".
                    // check to make sure that the timeUnit is consistent
                    Formattable& temp = parsed[0];
                    if (temp.getType() == Formattable::kDouble) {
                        tmpNumber = temp.getDouble();
                    } else if (temp.getType() == Formattable::kLong) {
                        tmpNumber = temp.getLong();
                    } else {
                    UnicodeString select = fPluralRules->select(tmpNumber);
    #ifdef TMUTFMT_DEBUG
                    select.extract(0, select.length(), res, "UTF-8");
                    std::cout << "parse plural select count: " << res << "\n"; 
                    if (*count != select) {
                int32_t parseDistance = pos.getIndex() - oldPos;
                if (parseDistance > longestParseDistance) {
                    if (pattern->getArgTypeCount() != 0) {
                        resultNumber = tmpNumber;
                        withNumberFormat = true;
                    } else {
                        withNumberFormat = false;
                    resultTimeUnit = i;
                    newPos = pos.getIndex();
                    longestParseDistance = parseDistance;
                    countOfLongestMatch = count;
    /* After find the longest match, parse the number.
     * Result number could be null for the pattern without number pattern.
     * such as unit pattern in Arabic.
     * When result number is null, use plural rule to set the number.
    if (withNumberFormat == false && longestParseDistance != 0) {
        // set the number using plurrual count
        if ( *countOfLongestMatch == PLURAL_COUNT_ZERO ) {
            resultNumber = 0;
        } else if ( *countOfLongestMatch == PLURAL_COUNT_ONE ) {
            resultNumber = 1;
        } else if ( *countOfLongestMatch == PLURAL_COUNT_TWO ) {
            resultNumber = 2;
        } else {
            // should not happen.
            // TODO: how to handle?
            resultNumber = 3;
    if (longestParseDistance == 0) {
    } else {
        UErrorCode status = U_ZERO_ERROR;
        TimeUnitAmount* tmutamt = new TimeUnitAmount(resultNumber, resultTimeUnit, status);
        if (U_SUCCESS(status)) {
        } else {