void RuleBasedNumberFormat::parse(const UnicodeString& text, Formattable& result, ParsePosition& parsePosition) const { //TODO: We need a real fix. See #6895 / #6896 if (noParse) { // skip parsing parsePosition.setErrorIndex(0); return; } if (!ruleSets) { parsePosition.setErrorIndex(0); return; } UnicodeString workingText(text, parsePosition.getIndex()); ParsePosition workingPos(0); ParsePosition high_pp(0); Formattable high_result; for (NFRuleSet** p = ruleSets; *p; ++p) { NFRuleSet *rp = *p; if (rp->isPublic() && rp->isParseable()) { ParsePosition working_pp(0); Formattable working_result; rp->parse(workingText, working_pp, kMaxDouble, working_result); if (working_pp.getIndex() > high_pp.getIndex()) { high_pp = working_pp; high_result = working_result; if (high_pp.getIndex() == workingText.length()) { break; } } } } int32_t startIndex = parsePosition.getIndex(); parsePosition.setIndex(startIndex + high_pp.getIndex()); if (high_pp.getIndex() > 0) { parsePosition.setErrorIndex(-1); } else { int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0; parsePosition.setErrorIndex(startIndex + errorIndex); } result = high_result; if (result.getType() == Formattable::kDouble) { int32_t r = (int32_t)result.getDouble(); if ((double)r == result.getDouble()) { result.setLong(r); } } }
UBool NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const { // try matching each rule in the rule set against the text being // parsed. Whichever one matches the most characters is the one // that determines the value we return. result.setLong(0); // dump out if there's no text to parse if (text.length() == 0) { return 0; } ParsePosition highWaterMark; ParsePosition workingPos = pos; #ifdef RBNF_DEBUG fprintf(stderr, "<nfrs> %x '", this); dumpUS(stderr, name); fprintf(stderr, "' text '"); dumpUS(stderr, text); fprintf(stderr, "'\n"); fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0); #endif // start by trying the negative number rule (if there is one) if (negativeNumberRule) { Formattable tempResult; #ifdef RBNF_DEBUG fprintf(stderr, " <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound); #endif UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult); #ifdef RBNF_DEBUG fprintf(stderr, " <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex()); #endif if (success && workingPos.getIndex() > highWaterMark.getIndex()) { result = tempResult; highWaterMark = workingPos; } workingPos = pos; } #ifdef RBNF_DEBUG fprintf(stderr, "<nfrs> continue fractional with text '"); dumpUS(stderr, text); fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); #endif // then try each of the fraction rules { for (int i = 0; i < 3; i++) { if (fractionRules[i]) { Formattable tempResult; UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult); if (success && (workingPos.getIndex() > highWaterMark.getIndex())) { result = tempResult; highWaterMark = workingPos; } workingPos = pos; } } } #ifdef RBNF_DEBUG fprintf(stderr, "<nfrs> continue other with text '"); dumpUS(stderr, text); fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); #endif // finally, go through the regular rules one at a time. We start // at the end of the list because we want to try matching the most // sigificant rule first (this helps ensure that we parse // "five thousand three hundred six" as // "(five thousand) (three hundred) (six)" rather than // "((five thousand three) hundred) (six)"). Skip rules whose // base values are higher than the upper bound (again, this helps // limit ambiguity by making sure the rules that match a rule's // are less significant than the rule containing the substitutions)/ { int64_t ub = util64_fromDouble(upperBound); #ifdef RBNF_DEBUG { char ubstr[64]; util64_toa(ub, ubstr, 64); char ubstrhex[64]; util64_toa(ub, ubstrhex, 64, 16); fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex); } #endif for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) { if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) { continue; } Formattable tempResult; UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult); if (success && workingPos.getIndex() > highWaterMark.getIndex()) { result = tempResult; highWaterMark = workingPos; } workingPos = pos; } } #ifdef RBNF_DEBUG fprintf(stderr, "<nfrs> exit\n"); #endif // finally, update the parse postion we were passed to point to the // first character we didn't use, and return the result that // corresponds to that string of characters pos = highWaterMark; return 1; }
void PluralFormatTest::pluralFormatExtendedTest(void) { const char *targets[] = { "There are no widgets.", "There is one widget.", "There is a bling widget and one other widget.", "There is a bling widget and 2 other widgets.", "There is a bling widget and 3 other widgets.", "Widgets, five (5-1=4) there be.", "There is a bling widget and 5 other widgets.", "There is a bling widget and 6 other widgets.", }; const char* fmt = "offset:1.0 " "=0 {There are no widgets.} " "=1.0 {There is one widget.} " "=5 {Widgets, five (5-1=#) there be.} " "one {There is a bling widget and one other widget.} " "other {There is a bling widget and # other widgets.}"; UErrorCode status = U_ZERO_ERROR; UnicodeString fmtString(fmt, -1, US_INV); PluralFormat pf(Locale::getEnglish(), fmtString, status); MessageFormat mf(UNICODE_STRING_SIMPLE("{0,plural,").append(fmtString).append((UChar)0x7d /* '}' */), Locale::getEnglish(), status); Formattable args; FieldPosition ignore; if (U_FAILURE(status)) { dataerrln("Failed to apply pattern - %s", u_errorName(status)); return; } for (int32_t i = 0; i < 7; ++i) { UnicodeString result = pf.format(i, status); if (U_FAILURE(status)) { errln("PluralFormat.format(value %d) failed - %s", i, u_errorName(status)); return; } UnicodeString expected(targets[i], -1, US_INV); if (expected != result) { UnicodeString message("PluralFormat.format(): Expected '", -1, US_INV); message.append(expected); message.append(UnicodeString("' but got '", -1, US_INV)); message.append(result); message.append("'", -1, US_INV); errln(message); } args.setLong(i); mf.format(&args, 1, result.remove(), ignore, status); if (U_FAILURE(status)) { errln("MessageFormat.format(value %d) failed - %s", i, u_errorName(status)); return; } if (expected != result) { UnicodeString message("MessageFormat.format(): Expected '", -1, US_INV); message.append(expected); message.append(UnicodeString("' but got '", -1, US_INV)); message.append(result); message.append("'", -1, US_INV); errln(message); } } }
/** * Parses a string using the rule set or DecimalFormat belonging * to this substitution. If there's a match, a mathematical * operation (the inverse of the one used in formatting) is * performed on the result of the parse and the value passed in * and returned as the result. The parse position is updated to * point to the first unmatched character in the string. * @param text The string to parse * @param parsePosition On entry, ignored, but assumed to be 0. * On exit, this is updated to point to the first unmatched * character (or 0 if the substitution didn't match) * @param baseValue A partial parse result that should be * combined with the result of this parse * @param upperBound When searching the rule set for a rule * matching the string passed in, only rules with base values * lower than this are considered * @param lenientParse If true and matching against rules fails, * the substitution will also try matching the text against * numerals using a default-costructed NumberFormat. If false, * no extra work is done. (This value is false whenever the * formatter isn't in lenient-parse mode, but is also false * under some conditions even when the formatter _is_ in * lenient-parse mode.) * @return If there's a match, this is the result of composing * baseValue with whatever was returned from matching the * characters. This will be either a Long or a Double. If there's * no match this is new Long(0) (not null), and parsePosition * is left unchanged. */ UBool NFSubstitution::doParse(const UnicodeString& text, ParsePosition& parsePosition, double baseValue, double upperBound, UBool lenientParse, Formattable& result) const { #ifdef RBNF_DEBUG fprintf(stderr, "<nfsubs> %x bv: %g ub: %g\n", this, baseValue, upperBound); #endif // figure out the highest base value a rule can have and match // the text being parsed (this varies according to the type of // substitutions: multiplier, modulus, and numerator substitutions // restrict the search to rules with base values lower than their // own; same-value substitutions leave the upper bound wherever // it was, and the others allow any rule to match upperBound = calcUpperBound(upperBound); // use our rule set to parse the text. If that fails and // lenient parsing is enabled (this is always false if the // formatter's lenient-parsing mode is off, but it may also // be false even when the formatter's lenient-parse mode is // on), then also try parsing the text using a default- // constructed NumberFormat if (ruleSet != NULL) { ruleSet->parse(text, parsePosition, upperBound, result); if (lenientParse && !ruleSet->isFractionRuleSet() && parsePosition.getIndex() == 0) { UErrorCode status = U_ZERO_ERROR; NumberFormat* fmt = NumberFormat::createInstance(status); if (U_SUCCESS(status)) { fmt->parse(text, result, parsePosition); } delete fmt; } // ...or use our DecimalFormat to parse the text } else if (numberFormat != NULL) { numberFormat->parse(text, result, parsePosition); } // if the parse was successful, we've already advanced the caller's // parse position (this is the one function that doesn't have one // of its own). Derive a parse result and return it as a Long, // if possible, or a Double if (parsePosition.getIndex() != 0) { UErrorCode status = U_ZERO_ERROR; double tempResult = result.getDouble(status); // composeRuleValue() produces a full parse result from // the partial parse result passed to this function from // the caller (this is either the owning rule's base value // or the partial result obtained from composing the // owning rule's base value with its other substitution's // parse result) and the partial parse result obtained by // matching the substitution (which will be the same value // the caller would get by parsing just this part of the // text with RuleBasedNumberFormat.parse() ). How the two // values are used to derive the full parse result depends // on the types of substitutions: For a regular rule, the // ultimate result is its multiplier substitution's result // times the rule's divisor (or the rule's base value) plus // the modulus substitution's result (which will actually // supersede part of the rule's base value). For a negative- // number rule, the result is the negative of its substitution's // result. For a fraction rule, it's the sum of its two // substitution results. For a rule in a fraction rule set, // it's the numerator substitution's result divided by // the rule's base value. Results from same-value substitutions // propagate back upard, and null substitutions don't affect // the result. tempResult = composeRuleValue(tempResult, baseValue); result.setDouble(tempResult); return TRUE; // if the parse was UNsuccessful, return 0 } else { result.setLong(0); return FALSE; } }