Esempio n. 1
0
bool CodePointMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const {
    if (segment.startsWith(fCp)) {
        segment.adjustOffsetByCodePoint();
        result.setCharsConsumed(segment);
    }
    return false;
}
Esempio n. 2
0
bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign,
                           UErrorCode&) const {
    if (result.seenNumber() && exponentSign == 0) {
        // A number has already been consumed.
        return false;
    } else if (exponentSign != 0) {
        // scientific notation always comes after the number
        U_ASSERT(!result.quantity.bogus);
    }

    // Initial offset before any character consumption.
    int32_t initialOffset = segment.getOffset();

    // Return value: whether to ask for more characters.
    bool maybeMore = false;

    // All digits consumed so far.
    number::impl::DecimalQuantity digitsConsumed;
    digitsConsumed.bogus = true;

    // The total number of digits after the decimal place, used for scaling the result.
    int32_t digitsAfterDecimalPlace = 0;

    // The actual grouping and decimal separators used in the string.
    // If non-null, we have seen that token.
    UnicodeString actualGroupingString;
    UnicodeString actualDecimalString;
    actualGroupingString.setToBogus();
    actualDecimalString.setToBogus();

    // Information for two groups: the previous group and the current group.
    //
    // Each group has three pieces of information:
    //
    // Offset: the string position of the beginning of the group, including a leading separator
    // if there was a leading separator. This is needed in case we need to rewind the parse to
    // that position.
    //
    // Separator type:
    // 0 => beginning of string
    // 1 => lead separator is a grouping separator
    // 2 => lead separator is a decimal separator
    //
    // Count: the number of digits in the group. If -1, the group has been validated.
    int32_t currGroupOffset = 0;
    int32_t currGroupSepType = 0;
    int32_t currGroupCount = 0;
    int32_t prevGroupOffset = -1;
    int32_t prevGroupSepType = -1;
    int32_t prevGroupCount = -1;

    while (segment.length() > 0) {
        maybeMore = false;

        // Attempt to match a digit.
        int8_t digit = -1;

        // Try by code point digit value.
        UChar32 cp = segment.getCodePoint();
        if (u_isdigit(cp)) {
            segment.adjustOffset(U16_LENGTH(cp));
            digit = static_cast<int8_t>(u_digit(cp, 10));
        }

        // Try by digit string.
        if (digit == -1 && !fLocalDigitStrings.isNull()) {
            for (int32_t i = 0; i < 10; i++) {
                const UnicodeString& str = fLocalDigitStrings[i];
                if (str.isEmpty()) {
                    continue;
                }
                int32_t overlap = segment.getCommonPrefixLength(str);
                if (overlap == str.length()) {
                    segment.adjustOffset(overlap);
                    digit = static_cast<int8_t>(i);
                    break;
                }
                maybeMore = maybeMore || (overlap == segment.length());
            }
        }

        if (digit >= 0) {
            // Digit was found.
            if (digitsConsumed.bogus) {
                digitsConsumed.bogus = false;
                digitsConsumed.clear();
            }
            digitsConsumed.appendDigit(digit, 0, true);
            currGroupCount++;
            if (!actualDecimalString.isBogus()) {
                digitsAfterDecimalPlace++;
            }
            continue;
        }

        // Attempt to match a literal grouping or decimal separator.
        bool isDecimal = false;
        bool isGrouping = false;

        // 1) Attempt the decimal separator string literal.
        // if (we have not seen a decimal separator yet) { ... }
        if (actualDecimalString.isBogus() && !decimalSeparator.isEmpty()) {
            int32_t overlap = segment.getCommonPrefixLength(decimalSeparator);
            maybeMore = maybeMore || (overlap == segment.length());
            if (overlap == decimalSeparator.length()) {
                isDecimal = true;
                actualDecimalString = decimalSeparator;
            }
        }

        // 2) Attempt to match the actual grouping string literal.
        if (!actualGroupingString.isBogus()) {
            int32_t overlap = segment.getCommonPrefixLength(actualGroupingString);
            maybeMore = maybeMore || (overlap == segment.length());
            if (overlap == actualGroupingString.length()) {
                isGrouping = true;
            }
        }

        // 2.5) Attempt to match a new the grouping separator string literal.
        // if (we have not seen a grouping or decimal separator yet) { ... }
        if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus() &&
            !groupingSeparator.isEmpty()) {
            int32_t overlap = segment.getCommonPrefixLength(groupingSeparator);
            maybeMore = maybeMore || (overlap == segment.length());
            if (overlap == groupingSeparator.length()) {
                isGrouping = true;
                actualGroupingString = groupingSeparator;
            }
        }

        // 3) Attempt to match a decimal separator from the equivalence set.
        // if (we have not seen a decimal separator yet) { ... }
        // The !isGrouping is to confirm that we haven't yet matched the current character.
        if (!isGrouping && actualDecimalString.isBogus()) {
            if (decimalUniSet->contains(cp)) {
                isDecimal = true;
                actualDecimalString = UnicodeString(cp);
            }
        }

        // 4) Attempt to match a grouping separator from the equivalence set.
        // if (we have not seen a grouping or decimal separator yet) { ... }
        if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus()) {
            if (groupingUniSet->contains(cp)) {
                isGrouping = true;
                actualGroupingString = UnicodeString(cp);
            }
        }

        // Leave if we failed to match this as a separator.
        if (!isDecimal && !isGrouping) {
            break;
        }

        // Check for conditions when we don't want to accept the separator.
        if (isDecimal && integerOnly) {
            break;
        } else if (currGroupSepType == 2 && isGrouping) {
            // Fraction grouping
            break;
        }

        // Validate intermediate grouping sizes.
        bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
        bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
        if (!prevValidSecondary || (isDecimal && !currValidPrimary)) {
            // Invalid grouping sizes.
            if (isGrouping && currGroupCount == 0) {
                // Trailing grouping separators: these are taken care of below
                U_ASSERT(currGroupSepType == 1);
            } else if (requireGroupingMatch) {
                // Strict mode: reject the parse
                digitsConsumed.clear();
                digitsConsumed.bogus = true;
            }
            break;
        } else if (requireGroupingMatch && currGroupCount == 0 && currGroupSepType == 1) {
            break;
        } else {
            // Grouping sizes OK so far.
            prevGroupOffset = currGroupOffset;
            prevGroupCount = currGroupCount;
            if (isDecimal) {
                // Do not validate this group any more.
                prevGroupSepType = -1;
            } else {
                prevGroupSepType = currGroupSepType;
            }
        }

        // OK to accept the separator.
        // Special case: don't update currGroup if it is empty; this allows two grouping
        // separators in a row in lenient mode.
        if (currGroupCount != 0) {
            currGroupOffset = segment.getOffset();
        }
        currGroupSepType = isGrouping ? 1 : 2;
        currGroupCount = 0;
        if (isGrouping) {
            segment.adjustOffset(actualGroupingString.length());
        } else {
            segment.adjustOffset(actualDecimalString.length());
        }
    }

    // End of main loop.
    // Back up if there was a trailing grouping separator.
    // Shift prev -> curr so we can check it as a final group.
    if (currGroupSepType != 2 && currGroupCount == 0) {
        maybeMore = true;
        segment.setOffset(currGroupOffset);
        currGroupOffset = prevGroupOffset;
        currGroupSepType = prevGroupSepType;
        currGroupCount = prevGroupCount;
        prevGroupOffset = -1;
        prevGroupSepType = 0;
        prevGroupCount = 1;
    }

    // Validate final grouping sizes.
    bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
    bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
    if (!requireGroupingMatch) {
        // The cases we need to handle here are lone digits.
        // Examples: "1,1"  "1,1,"  "1,1,1"  "1,1,1,"  ",1" (all parse as 1)
        // See more examples in numberformattestspecification.txt
        int32_t digitsToRemove = 0;
        if (!prevValidSecondary) {
            segment.setOffset(prevGroupOffset);
            digitsToRemove += prevGroupCount;
            digitsToRemove += currGroupCount;
        } else if (!currValidPrimary && (prevGroupSepType != 0 || prevGroupCount != 0)) {
            maybeMore = true;
            segment.setOffset(currGroupOffset);
            digitsToRemove += currGroupCount;
        }
        if (digitsToRemove != 0) {
            digitsConsumed.adjustMagnitude(-digitsToRemove);
            digitsConsumed.truncate();
        }
        prevValidSecondary = true;
        currValidPrimary = true;
    }
    if (currGroupSepType != 2 && (!prevValidSecondary || !currValidPrimary)) {
        // Grouping failure.
        digitsConsumed.bogus = true;
    }

    // Strings that start with a separator but have no digits,
    // or strings that failed a grouping size check.
    if (digitsConsumed.bogus) {
        maybeMore = maybeMore || (segment.length() == 0);
        segment.setOffset(initialOffset);
        return maybeMore;
    }

    // We passed all inspections. Start post-processing.

    // Adjust for fraction part.
    digitsConsumed.adjustMagnitude(-digitsAfterDecimalPlace);

    // Set the digits, either normal or exponent.
    if (exponentSign != 0 && segment.getOffset() != initialOffset) {
        bool overflow = false;
        if (digitsConsumed.fitsInLong()) {
            int64_t exponentLong = digitsConsumed.toLong(false);
            U_ASSERT(exponentLong >= 0);
            if (exponentLong <= INT32_MAX) {
                auto exponentInt = static_cast<int32_t>(exponentLong);
                if (result.quantity.adjustMagnitude(exponentSign * exponentInt)) {
                    overflow = true;
                }
            } else {
                overflow = true;
            }
        } else {
            overflow = true;
        }
        if (overflow) {
            if (exponentSign == -1) {
                // Set to zero
                result.quantity.clear();
            } else {
                // Set to infinity
                result.quantity.bogus = true;
                result.flags |= FLAG_INFINITY;
            }
        }
    } else {
        result.quantity = digitsConsumed;
    }

    // Set other information into the result and return.
    if (!actualDecimalString.isBogus()) {
        result.flags |= FLAG_HAS_DECIMAL_SEPARATOR;
    }
    result.setCharsConsumed(segment);
    return segment.length() == 0 || maybeMore;
}