예제 #1
0
void
RuleBasedNumberFormat::parse(const UnicodeString& text,
                             Formattable& result,
                             ParsePosition& parsePosition) const
{
    //TODO: We need a real fix.  See #6895 / #6896
    if (noParse) {
        // skip parsing
        parsePosition.setErrorIndex(0);
        return;
    }

    if (!ruleSets) {
        parsePosition.setErrorIndex(0);
        return;
    }

    UnicodeString workingText(text, parsePosition.getIndex());
    ParsePosition workingPos(0);

    ParsePosition high_pp(0);
    Formattable high_result;

    for (NFRuleSet** p = ruleSets; *p; ++p) {
        NFRuleSet *rp = *p;
        if (rp->isPublic() && rp->isParseable()) {
            ParsePosition working_pp(0);
            Formattable working_result;

            rp->parse(workingText, working_pp, kMaxDouble, working_result);
            if (working_pp.getIndex() > high_pp.getIndex()) {
                high_pp = working_pp;
                high_result = working_result;

                if (high_pp.getIndex() == workingText.length()) {
                    break;
                }
            }
        }
    }

    int32_t startIndex = parsePosition.getIndex();
    parsePosition.setIndex(startIndex + high_pp.getIndex());
    if (high_pp.getIndex() > 0) {
        parsePosition.setErrorIndex(-1);
    } else {
        int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
        parsePosition.setErrorIndex(startIndex + errorIndex);
    }
    result = high_result;
    if (result.getType() == Formattable::kDouble) {
        int32_t r = (int32_t)result.getDouble();
        if ((double)r == result.getDouble()) {
            result.setLong(r);
        }
    }
}
예제 #2
0
UBool
NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
{
    // try matching each rule in the rule set against the text being
    // parsed.  Whichever one matches the most characters is the one
    // that determines the value we return.

    result.setLong(0);

    // dump out if there's no text to parse
    if (text.length() == 0) {
        return 0;
    }

    ParsePosition highWaterMark;
    ParsePosition workingPos = pos;

#ifdef RBNF_DEBUG
    fprintf(stderr, "<nfrs> %x '", this);
    dumpUS(stderr, name);
    fprintf(stderr, "' text '");
    dumpUS(stderr, text);
    fprintf(stderr, "'\n");
    fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
#endif

    // start by trying the negative number rule (if there is one)
    if (negativeNumberRule) {
        Formattable tempResult;
#ifdef RBNF_DEBUG
        fprintf(stderr, "  <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound);
#endif
        UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult);
#ifdef RBNF_DEBUG
        fprintf(stderr, "  <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex());
#endif
        if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
            result = tempResult;
            highWaterMark = workingPos;
        }
        workingPos = pos;
    }
#ifdef RBNF_DEBUG
    fprintf(stderr, "<nfrs> continue fractional with text '");
    dumpUS(stderr, text);
    fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
#endif
    // then try each of the fraction rules
    {
        for (int i = 0; i < 3; i++) {
            if (fractionRules[i]) {
                Formattable tempResult;
                UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
                if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
                    result = tempResult;
                    highWaterMark = workingPos;
                }
                workingPos = pos;
            }
        }
    }
#ifdef RBNF_DEBUG
    fprintf(stderr, "<nfrs> continue other with text '");
    dumpUS(stderr, text);
    fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
#endif

    // finally, go through the regular rules one at a time.  We start
    // at the end of the list because we want to try matching the most
    // sigificant rule first (this helps ensure that we parse
    // "five thousand three hundred six" as
    // "(five thousand) (three hundred) (six)" rather than
    // "((five thousand three) hundred) (six)").  Skip rules whose
    // base values are higher than the upper bound (again, this helps
    // limit ambiguity by making sure the rules that match a rule's
    // are less significant than the rule containing the substitutions)/
    {
        int64_t ub = util64_fromDouble(upperBound);
#ifdef RBNF_DEBUG
        {
            char ubstr[64];
            util64_toa(ub, ubstr, 64);
            char ubstrhex[64];
            util64_toa(ub, ubstrhex, 64, 16);
            fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
        }
#endif
        for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
            if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
                continue;
            }
            Formattable tempResult;
            UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
            if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
                result = tempResult;
                highWaterMark = workingPos;
            }
            workingPos = pos;
        }
    }
#ifdef RBNF_DEBUG
    fprintf(stderr, "<nfrs> exit\n");
#endif
    // finally, update the parse postion we were passed to point to the
    // first character we didn't use, and return the result that
    // corresponds to that string of characters
    pos = highWaterMark;

    return 1;
}
예제 #3
0
void
PluralFormatTest::pluralFormatExtendedTest(void) {
  const char *targets[] = {
    "There are no widgets.",
    "There is one widget.",
    "There is a bling widget and one other widget.",
    "There is a bling widget and 2 other widgets.",
    "There is a bling widget and 3 other widgets.",
    "Widgets, five (5-1=4) there be.",
    "There is a bling widget and 5 other widgets.",
    "There is a bling widget and 6 other widgets.",
  };

  const char* fmt =
      "offset:1.0 "
      "=0 {There are no widgets.} "
      "=1.0 {There is one widget.} "
      "=5 {Widgets, five (5-1=#) there be.} "
      "one {There is a bling widget and one other widget.} "
      "other {There is a bling widget and # other widgets.}";

  UErrorCode status = U_ZERO_ERROR;
  UnicodeString fmtString(fmt, -1, US_INV);
  PluralFormat pf(Locale::getEnglish(), fmtString, status);
  MessageFormat mf(UNICODE_STRING_SIMPLE("{0,plural,").append(fmtString).append((UChar)0x7d /* '}' */),
                   Locale::getEnglish(), status);
  Formattable args;
  FieldPosition ignore;
  if (U_FAILURE(status)) {
    dataerrln("Failed to apply pattern - %s", u_errorName(status));
    return;
  }
  for (int32_t i = 0; i < 7; ++i) {
    UnicodeString result = pf.format(i, status);
    if (U_FAILURE(status)) {
      errln("PluralFormat.format(value %d) failed - %s", i, u_errorName(status));
      return;
    }
    UnicodeString expected(targets[i], -1, US_INV);
    if (expected != result) {
      UnicodeString message("PluralFormat.format(): Expected '", -1, US_INV);
      message.append(expected);
      message.append(UnicodeString("' but got '", -1, US_INV));
      message.append(result);
      message.append("'", -1, US_INV);
      errln(message);
    }
    args.setLong(i);
    mf.format(&args, 1, result.remove(), ignore, status);
    if (U_FAILURE(status)) {
      errln("MessageFormat.format(value %d) failed - %s", i, u_errorName(status));
      return;
    }
    if (expected != result) {
      UnicodeString message("MessageFormat.format(): Expected '", -1, US_INV);
      message.append(expected);
      message.append(UnicodeString("' but got '", -1, US_INV));
      message.append(result);
      message.append("'", -1, US_INV);
      errln(message);
    }
  }
}
예제 #4
0
/**
 * Parses a string using the rule set or DecimalFormat belonging
 * to this substitution.  If there's a match, a mathematical
 * operation (the inverse of the one used in formatting) is
 * performed on the result of the parse and the value passed in
 * and returned as the result.  The parse position is updated to
 * point to the first unmatched character in the string.
 * @param text The string to parse
 * @param parsePosition On entry, ignored, but assumed to be 0.
 * On exit, this is updated to point to the first unmatched
 * character (or 0 if the substitution didn't match)
 * @param baseValue A partial parse result that should be
 * combined with the result of this parse
 * @param upperBound When searching the rule set for a rule
 * matching the string passed in, only rules with base values
 * lower than this are considered
 * @param lenientParse If true and matching against rules fails,
 * the substitution will also try matching the text against
 * numerals using a default-costructed NumberFormat.  If false,
 * no extra work is done.  (This value is false whenever the
 * formatter isn't in lenient-parse mode, but is also false
 * under some conditions even when the formatter _is_ in
 * lenient-parse mode.)
 * @return If there's a match, this is the result of composing
 * baseValue with whatever was returned from matching the
 * characters.  This will be either a Long or a Double.  If there's
 * no match this is new Long(0) (not null), and parsePosition
 * is left unchanged.
 */
UBool
NFSubstitution::doParse(const UnicodeString& text,
                        ParsePosition& parsePosition,
                        double baseValue,
                        double upperBound,
                        UBool lenientParse,
                        Formattable& result) const
{
#ifdef RBNF_DEBUG
    fprintf(stderr, "<nfsubs> %x bv: %g ub: %g\n", this, baseValue, upperBound);
#endif
    // figure out the highest base value a rule can have and match
    // the text being parsed (this varies according to the type of
    // substitutions: multiplier, modulus, and numerator substitutions
    // restrict the search to rules with base values lower than their
    // own; same-value substitutions leave the upper bound wherever
    // it was, and the others allow any rule to match
    upperBound = calcUpperBound(upperBound);

    // use our rule set to parse the text.  If that fails and
    // lenient parsing is enabled (this is always false if the
    // formatter's lenient-parsing mode is off, but it may also
    // be false even when the formatter's lenient-parse mode is
    // on), then also try parsing the text using a default-
    // constructed NumberFormat
    if (ruleSet != NULL) {
        ruleSet->parse(text, parsePosition, upperBound, result);
        if (lenientParse && !ruleSet->isFractionRuleSet() && parsePosition.getIndex() == 0) {
            UErrorCode status = U_ZERO_ERROR;
            NumberFormat* fmt = NumberFormat::createInstance(status);
            if (U_SUCCESS(status)) {
                fmt->parse(text, result, parsePosition);
            }
            delete fmt;
        }

        // ...or use our DecimalFormat to parse the text
    } else if (numberFormat != NULL) {
        numberFormat->parse(text, result, parsePosition);
    }

    // if the parse was successful, we've already advanced the caller's
    // parse position (this is the one function that doesn't have one
    // of its own).  Derive a parse result and return it as a Long,
    // if possible, or a Double
    if (parsePosition.getIndex() != 0) {
        UErrorCode status = U_ZERO_ERROR;
        double tempResult = result.getDouble(status);

        // composeRuleValue() produces a full parse result from
        // the partial parse result passed to this function from
        // the caller (this is either the owning rule's base value
        // or the partial result obtained from composing the
        // owning rule's base value with its other substitution's
        // parse result) and the partial parse result obtained by
        // matching the substitution (which will be the same value
        // the caller would get by parsing just this part of the
        // text with RuleBasedNumberFormat.parse() ).  How the two
        // values are used to derive the full parse result depends
        // on the types of substitutions: For a regular rule, the
        // ultimate result is its multiplier substitution's result
        // times the rule's divisor (or the rule's base value) plus
        // the modulus substitution's result (which will actually
        // supersede part of the rule's base value).  For a negative-
        // number rule, the result is the negative of its substitution's
        // result.  For a fraction rule, it's the sum of its two
        // substitution results.  For a rule in a fraction rule set,
        // it's the numerator substitution's result divided by
        // the rule's base value.  Results from same-value substitutions
        // propagate back upard, and null substitutions don't affect
        // the result.
        tempResult = composeRuleValue(tempResult, baseValue);
        result.setDouble(tempResult);
        return TRUE;
        // if the parse was UNsuccessful, return 0
    } else {
        result.setLong(0);
        return FALSE;
    }
}