U_CAPI void U_EXPORT2 udat_parseCalendar(const UDateFormat* format, UCalendar* calendar, const UChar* text, int32_t textLength, int32_t *parsePos, UErrorCode *status) { if(U_FAILURE(*status)) return; const UnicodeString src((UBool)(textLength == -1), text, textLength); ParsePosition pp; if(parsePos != 0) pp.setIndex(*parsePos); ((DateFormat*)format)->parse(src, *(Calendar*)calendar, pp); if(parsePos != 0) { if(pp.getErrorIndex() == -1) *parsePos = pp.getIndex(); else { *parsePos = pp.getErrorIndex(); *status = U_PARSE_ERROR; } } }
UDate DateFormat::parse(const UnicodeString& text, ParsePosition& pos) const { UDate d = 0; // Error return UDate is 0 (the epoch) if (fCalendar != NULL) { int32_t start = pos.getIndex(); // Parse may update TimeZone used by the calendar. TimeZone *tzsav = (TimeZone*)fCalendar->getTimeZone().clone(); fCalendar->clear(); parse(text, *fCalendar, pos); if (pos.getIndex() != start) { UErrorCode ec = U_ZERO_ERROR; d = fCalendar->getTime(ec); if (U_FAILURE(ec)) { // We arrive here if fCalendar is non-lenient and there // is an out-of-range field. We don't know which field // was illegal so we set the error index to the start. pos.setIndex(start); pos.setErrorIndex(start); d = 0; } } // Restore TimeZone fCalendar->adoptTimeZone(tzsav); } return d; }
double ChoiceFormat::parseArgument( const MessagePattern &pattern, int32_t partIndex, const UnicodeString &source, ParsePosition &pos) { // find the best number (defined as the one with the longest parse) int32_t start = pos.getIndex(); int32_t furthest = start; double bestNumber = uprv_getNaN(); double tempNumber = 0.0; int32_t count = pattern.countParts(); while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) { tempNumber = pattern.getNumericValue(pattern.getPart(partIndex)); partIndex += 2; // skip the numeric part and ignore the ARG_SELECTOR int32_t msgLimit = pattern.getLimitPartIndex(partIndex); int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start); if (len >= 0) { int32_t newIndex = start + len; if (newIndex > furthest) { furthest = newIndex; bestNumber = tempNumber; if (furthest == source.length()) { break; } } } partIndex = msgLimit + 1; } if (furthest == start) { pos.setErrorIndex(start); } else { pos.setIndex(furthest); } return bestNumber; }
static void parseRes(Formattable& res, const UNumberFormat* fmt, const UChar* text, int32_t textLength, int32_t *parsePos /* 0 = start */, UErrorCode *status) { if(U_FAILURE(*status)) return; const UnicodeString src((UBool)(textLength == -1), text, textLength); ParsePosition pp; if(parsePos != 0) pp.setIndex(*parsePos); ((const NumberFormat*)fmt)->parse(src, res, pp); if(pp.getErrorIndex() != -1) { *status = U_PARSE_ERROR; if(parsePos != 0) { *parsePos = pp.getErrorIndex(); } } else if(parsePos != 0) { *parsePos = pp.getIndex(); } }
UDate DateFormat::parse(const UnicodeString& text, ParsePosition& pos) const { UDate d = 0; // Error return UDate is 0 (the epoch) if (fCalendar != NULL) { Calendar* calClone = fCalendar->clone(); if (calClone != NULL) { int32_t start = pos.getIndex(); calClone->clear(); parse(text, *calClone, pos); if (pos.getIndex() != start) { UErrorCode ec = U_ZERO_ERROR; d = calClone->getTime(ec); if (U_FAILURE(ec)) { // We arrive here if fCalendar => calClone is non-lenient and // there is an out-of-range field. We don't know which field // was illegal so we set the error index to the start. pos.setIndex(start); pos.setErrorIndex(start); d = 0; } } delete calClone; } } return d; }
U_CAPI int32_t U_EXPORT2 uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status){ // status code needs to be checked since we // dereference it if(status == NULL || U_FAILURE(*status)){ return 0; } // check only the set paramenter // if pattern is NULL or null terminate // UnicodeString constructor takes care of it if(set == NULL){ *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } UnicodeString pat(pattern, patternLength); ParsePosition pos; ((UnicodeSet*) set)->applyPattern(pat, pos, options, NULL, *status); return pos.getIndex(); }
bool NumberValcon::parse(const QString& text) { if (text.isEmpty()) { _value = 0; return true; } UnicodeString utext = convertToICU(text); // Parse the number using ICU UErrorCode status = U_ZERO_ERROR; NumberFormat* fmt = NumberFormat::createInstance(status); if (U_SUCCESS(status)) { Formattable value; ParsePosition pos; fmt->parse(utext, value, pos); if (pos.getErrorIndex() == -1 && pos.getIndex() == utext.length()) { #if U_ICU_VERSION_MAJOR_NUM < 3 _value = value.getDouble(&status); #else _value = value.getDouble(status); #endif return true; } } return false; }
U_CAPI UDate U_EXPORT2 udat_parse( const UDateFormat* format, const UChar* text, int32_t textLength, int32_t *parsePos, UErrorCode *status) { if(U_FAILURE(*status)) return (UDate)0; const UnicodeString src((UBool)(textLength == -1), text, textLength); ParsePosition pp; int32_t stackParsePos = 0; UDate res; if(parsePos == NULL) { parsePos = &stackParsePos; } pp.setIndex(*parsePos); res = ((DateFormat*)format)->parse(src, pp); if(pp.getErrorIndex() == -1) *parsePos = pp.getIndex(); else { *parsePos = pp.getErrorIndex(); *status = U_PARSE_ERROR; } return res; }
static void parseRes(Formattable& res, const UNumberFormat* fmt, const UChar* text, int32_t textLength, int32_t *parsePos /* 0 = start */, UBool parseCurrency, UErrorCode *status) { if(U_FAILURE(*status)) return; int32_t len = (textLength == -1 ? u_strlen(text) : textLength); const UnicodeString src((UChar*)text, len, len); ParsePosition pp; if(parsePos != 0) pp.setIndex(*parsePos); if (parseCurrency) { ((const NumberFormat*)fmt)->parseCurrency(src, res, pp); } else { ((const NumberFormat*)fmt)->parse(src, res, pp); } if(pp.getErrorIndex() != -1) { *status = U_PARSE_ERROR; if(parsePos != 0) { *parsePos = pp.getErrorIndex(); } } else if(parsePos != 0) { *parsePos = pp.getIndex(); } }
void PluralFormat::parseObject(const UnicodeString& /*source*/, Formattable& /*result*/, ParsePosition& pos) const { // Parsing not supported. pos.setErrorIndex(pos.getIndex()); }
void SelectFormat::parseObject(const UnicodeString& /*source*/, Formattable& /*result*/, ParsePosition& pos) const { // TODO: not yet supported in icu4j and icu4c pos.setErrorIndex(pos.getIndex()); }
void TimeUnitTest::test10219Plurals() { Locale usLocale("en_US"); double values[2] = {1.588, 1.011}; UnicodeString expected[2][3] = { {"1 minute", "1.5 minutes", "1.58 minutes"}, {"1 minute", "1.0 minutes", "1.01 minutes"} }; UErrorCode status = U_ZERO_ERROR; TimeUnitFormat tuf(usLocale, status); if (U_FAILURE(status)) { dataerrln("generating TimeUnitFormat Object failed: %s", u_errorName(status)); return; } LocalPointer<DecimalFormat> nf((DecimalFormat *) NumberFormat::createInstance(usLocale, status)); if (U_FAILURE(status)) { dataerrln("generating NumberFormat Object failed: %s", u_errorName(status)); return; } for (int32_t j = 0; j < UPRV_LENGTHOF(values); ++j) { for (int32_t i = 0; i < UPRV_LENGTHOF(expected[j]); ++i) { nf->setMinimumFractionDigits(i); nf->setMaximumFractionDigits(i); nf->setRoundingMode(DecimalFormat::kRoundDown); tuf.setNumberFormat(*nf, status); if (U_FAILURE(status)) { dataerrln("setting NumberFormat failed: %s", u_errorName(status)); return; } UnicodeString actual; Formattable fmt; LocalPointer<TimeUnitAmount> tamt( new TimeUnitAmount(values[j], TimeUnit::UTIMEUNIT_MINUTE, status), status); if (U_FAILURE(status)) { dataerrln("generating TimeUnitAmount Object failed: %s", u_errorName(status)); return; } fmt.adoptObject(tamt.orphan()); tuf.format(fmt, actual, status); if (U_FAILURE(status)) { dataerrln("Actual formatting failed: %s", u_errorName(status)); return; } if (expected[j][i] != actual) { errln("Expected " + expected[j][i] + ", got " + actual); } } } // test parsing Formattable result; ParsePosition pos; UnicodeString formattedString = "1 minutes"; tuf.parseObject(formattedString, result, pos); if (formattedString.length() != pos.getIndex()) { errln("Expect parsing to go all the way to the end of the string."); } }
/** * @bug 4104136 */ void DateFormatRegressionTest::Test4104136(void) { UErrorCode status = U_ZERO_ERROR; SimpleDateFormat *sdf = new SimpleDateFormat(status); if(U_FAILURE(status)) { dataerrln("Couldn't create SimpleDateFormat, error %s", u_errorName(status)); delete sdf; return; } if(failure(status, "new SimpleDateFormat")) return; UnicodeString pattern = "'time' hh:mm"; sdf->applyPattern(pattern); logln("pattern: \"" + pattern + "\""); UnicodeString strings [] = { (UnicodeString)"time 10:30", (UnicodeString) "time 10:x", (UnicodeString) "time 10x" }; ParsePosition ppos [] = { ParsePosition(10), ParsePosition(0), ParsePosition(0) }; UDate dates [] = { date(70, UCAL_JANUARY, 1, 10, 30), -1, -1 }; /*Object[] DATA = { "time 10:30", new ParsePosition(10), new Date(70, Calendar.JANUARY, 1, 10, 30), "time 10:x", new ParsePosition(0), null, "time 10x", new ParsePosition(0), null, };*/ for(int i = 0; i < 3; i++) { UnicodeString text = strings[i]; ParsePosition finish = ppos[i]; UDate exp = dates[i]; ParsePosition pos(0); UDate d = sdf->parse(text, pos); logln(" text: \"" + text + "\""); logln(" index: %d", pos.getIndex()); logln((UnicodeString) " result: " + d); if(pos.getIndex() != finish.getIndex()) errln("Fail: Expected pos " + finish.getIndex()); if (! ((d == 0 && exp == -1) || (d == exp))) errln((UnicodeString) "Fail: Expected result " + exp); } delete sdf; }
/** * If this is a >>> substitution, match only against ruleToUse. * Otherwise, use the superclass function. * @param text The string to parse * @param parsePosition Ignored on entry, updated on exit to point to * the first unmatched character. * @param baseValue The partial parse result prior to calling this * routine. */ UBool ModulusSubstitution::doParse(const UnicodeString& text, ParsePosition& parsePosition, double baseValue, double upperBound, UBool lenientParse, Formattable& result) const { // if this isn't a >>> substitution, we can just use the // inherited parse() routine to do the parsing if (ruleToUse == NULL) { return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, result); // but if it IS a >>> substitution, we have to do it here: we // use the specific rule's doParse() method, and then we have to // do some of the other work of NFRuleSet.parse() } else { ruleToUse->doParse(text, parsePosition, FALSE, upperBound, result); if (parsePosition.getIndex() != 0) { UErrorCode status = U_ZERO_ERROR; double tempResult = result.getDouble(status); tempResult = composeRuleValue(tempResult, baseValue); result.setDouble(tempResult); } return TRUE; } }
void ChoiceFormat::parse(const UnicodeString& text, Formattable& result, ParsePosition& status) const { // find the best number (defined as the one with the longest parse) int32_t start = status.getIndex(); int32_t furthest = start; double bestNumber = uprv_getNaN(); double tempNumber = 0.0; for (int i = 0; i < fCount; ++i) { int32_t len = fChoiceFormats[i].length(); if (text.compare(start, len, fChoiceFormats[i]) == 0) { status.setIndex(start + len); tempNumber = fChoiceLimits[i]; if (status.getIndex() > furthest) { furthest = status.getIndex(); bestNumber = tempNumber; if (furthest == text.length()) break; } } } status.setIndex(furthest); if (status.getIndex() == start) { status.setErrorIndex(furthest); } result.setDouble(bestNumber); }
void RuleBasedNumberFormat::parse(const UnicodeString& text, Formattable& result, ParsePosition& parsePosition) const { //TODO: We need a real fix. See #6895 / #6896 if (noParse) { // skip parsing parsePosition.setErrorIndex(0); return; } if (!ruleSets) { parsePosition.setErrorIndex(0); return; } UnicodeString workingText(text, parsePosition.getIndex()); ParsePosition workingPos(0); ParsePosition high_pp(0); Formattable high_result; for (NFRuleSet** p = ruleSets; *p; ++p) { NFRuleSet *rp = *p; if (rp->isPublic() && rp->isParseable()) { ParsePosition working_pp(0); Formattable working_result; rp->parse(workingText, working_pp, kMaxDouble, working_result); if (working_pp.getIndex() > high_pp.getIndex()) { high_pp = working_pp; high_result = working_result; if (high_pp.getIndex() == workingText.length()) { break; } } } } int32_t startIndex = parsePosition.getIndex(); parsePosition.setIndex(startIndex + high_pp.getIndex()); if (high_pp.getIndex() > 0) { parsePosition.setErrorIndex(-1); } else { int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0; parsePosition.setErrorIndex(startIndex + errorIndex); } result = high_result; if (result.getType() == Formattable::kDouble) { int32_t r = (int32_t)result.getDouble(); if ((double)r == result.getDouble()) { result.setLong(r); } } }
// // RBBISymbolTable::parseReference This function from the abstract symbol table interface // looks for a $variable name in the source text. // It does not look it up, only scans for it. // It is used by the UnicodeSet parser. // // This implementation is lifted pretty much verbatim // from the rules based transliterator implementation. // I didn't see an obvious way of sharing it. // UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text, ParsePosition& pos, int32_t limit) const { int32_t start = pos.getIndex(); int32_t i = start; UnicodeString result; while (i < limit) { UChar c = text.charAt(i); if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) { break; } ++i; } if (i == start) { // No valid name chars return result; // Indicate failure with empty string } pos.setIndex(i); text.extractBetween(start, i, result); return result; }
U_CAPI double U_EXPORT2 unum_parseDoubleCurrency(const UNumberFormat* fmt, const UChar* text, int32_t textLength, int32_t* parsePos, /* 0 = start */ UChar* currency, UErrorCode* status) { double doubleVal = 0.0; currency[0] = 0; if (U_FAILURE(*status)) { return doubleVal; } const UnicodeString src((UBool)(textLength == -1), text, textLength); ParsePosition pp; if (parsePos != NULL) { pp.setIndex(*parsePos); } *status = U_PARSE_ERROR; // assume failure, reset if succeed LocalPointer<CurrencyAmount> currAmt(((const NumberFormat*)fmt)->parseCurrency(src, pp)); if (pp.getErrorIndex() != -1) { if (parsePos != NULL) { *parsePos = pp.getErrorIndex(); } } else { if (parsePos != NULL) { *parsePos = pp.getIndex(); } if (pp.getIndex() > 0) { *status = U_ZERO_ERROR; u_strcpy(currency, currAmt->getISOCurrency()); doubleVal = currAmt->getNumber().getDouble(*status); } } return doubleVal; }
UDate DateFormat::parse(const UnicodeString& text, ParsePosition& pos) const { if (fCalendar != NULL) { int32_t start = pos.getIndex(); fCalendar->clear(); parse(text, *fCalendar, pos); if (pos.getIndex() != start) { UErrorCode ec = U_ZERO_ERROR; UDate d = fCalendar->getTime(ec); if (U_SUCCESS(ec)) { return d; // Successful function exit } // We arrive here if fCalendar is non-lenient and there // is an out-of-range field. We don't know which field // was illegal so we set the error index to the start. pos.setIndex(start); pos.setErrorIndex(start); } } return 0; // Error return UDate is 0 (the epoch) }
void RelativeDateFormat::parse( const UnicodeString& text, Calendar& cal, ParsePosition& pos) const { // Can the fDateFormat parse it? if(fDateFormat != NULL) { ParsePosition aPos(pos); fDateFormat->parse(text,cal,aPos); if((aPos.getIndex() != pos.getIndex()) && (aPos.getErrorIndex()==-1)) { pos=aPos; // copy the sub parse return; // parsed subfmt OK } } // Linear search the relative strings for(int n=0; n<fDatesLen; n++) { if(fDates[n].string != NULL && (0==text.compare(pos.getIndex(), fDates[n].len, fDates[n].string))) { UErrorCode status = U_ZERO_ERROR; // Set the calendar to now+offset cal.setTime(Calendar::getNow(),status); cal.add(UCAL_DATE,fDates[n].offset, status); if(U_FAILURE(status)) { // failure in setting calendar fields pos.setErrorIndex(pos.getIndex()+fDates[n].len); } else { pos.setIndex(pos.getIndex()+fDates[n].len); } return; } } // parse failed }
void DecimalFormatTest::execParseTest(int32_t lineNum, const UnicodeString &inputText, const UnicodeString &expectedType, const UnicodeString &expectedDecimal, UErrorCode &status) { if (U_FAILURE(status)) { return; } DecimalFormatSymbols symbols(Locale::getUS(), status); UnicodeString pattern = UNICODE_STRING_SIMPLE("####"); DecimalFormat format(pattern, symbols, status); Formattable result; if (U_FAILURE(status)) { errln("file dcfmtest.txt, line %d: %s error creating the formatter.", lineNum, u_errorName(status)); return; } ParsePosition pos; int32_t expectedParseEndPosition = inputText.length(); format.parse(inputText, result, pos); if (expectedParseEndPosition != pos.getIndex()) { errln("file dcfmtest.txt, line %d: Expected parse position afeter parsing: %d. " "Actual parse position: %d", expectedParseEndPosition, pos.getIndex()); return; } char expectedTypeC[2]; expectedType.extract(0, 1, expectedTypeC, 2, US_INV); Formattable::Type expectType = Formattable::kDate; switch (expectedTypeC[0]) { case 'd': expectType = Formattable::kDouble; break; case 'i': expectType = Formattable::kLong; break; case 'l': expectType = Formattable::kInt64; break; default: errln("file dcfmtest.tx, line %d: unrecongized expected type \"%s\"", lineNum, InvariantStringPiece(expectedType).data()); return; } if (result.getType() != expectType) { errln("file dcfmtest.txt, line %d: expectedParseType(%s) != actual parseType(%s)", lineNum, formattableType(expectType), formattableType(result.getType())); return; } StringPiece decimalResult = result.getDecimalNumber(status); if (U_FAILURE(status)) { errln("File %s, line %d: error %s. Line in file dcfmtest.txt: %d:", __FILE__, __LINE__, u_errorName(status), lineNum); return; } InvariantStringPiece expectedResults(expectedDecimal); if (decimalResult != expectedResults) { errln("file dcfmtest.txt, line %d: expected \"%s\", got \"%s\"", lineNum, expectedResults.data(), decimalResult.data()); } return; }
//--------------------------------------------------------------------------------- // // scanSet Construct a UnicodeSet from the text at the current scan // position. Advance the scan position to the first character // after the set. // // A new RBBI setref node referring to the set is pushed onto the node // stack. // // The scan position is normally under the control of the state machine // that controls rule parsing. UnicodeSets, however, are parsed by // the UnicodeSet constructor, not by the RBBI rule parser. // //--------------------------------------------------------------------------------- void RBBIRuleScanner::scanSet() { UnicodeSet *uset; ParsePosition pos; int startPos; int i; if (U_FAILURE(*fRB->fStatus)) { return; } pos.setIndex(fScanIndex); startPos = fScanIndex; UErrorCode localStatus = U_ZERO_ERROR; uset = new UnicodeSet(fRB->fRules, pos, USET_IGNORE_SPACE, fSymbolTable, localStatus); if (U_FAILURE(localStatus)) { // TODO: Get more accurate position of the error from UnicodeSet's return info. // UnicodeSet appears to not be reporting correctly at this time. #ifdef RBBI_DEBUG RBBIDebugPrintf("UnicodeSet parse postion.ErrorIndex = %d\n", pos.getIndex()); #endif error(localStatus); delete uset; return; } // Verify that the set contains at least one code point. // if (uset->isEmpty()) { // This set is empty. // Make it an error, because it almost certainly is not what the user wanted. // Also, avoids having to think about corner cases in the tree manipulation code // that occurs later on. error(U_BRK_RULE_EMPTY_SET); delete uset; return; } // Advance the RBBI parse postion over the UnicodeSet pattern. // Don't just set fScanIndex because the line/char positions maintained // for error reporting would be thrown off. i = pos.getIndex(); for (;;) { if (fNextIndex >= i) { break; } nextCharLL(); } if (U_SUCCESS(*fRB->fStatus)) { RBBINode *n; n = pushNewNode(RBBINode::setRef); n->fFirstPos = startPos; n->fLastPos = fNextIndex; fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); // findSetFor() serves several purposes here: // - Adopts storage for the UnicodeSet, will be responsible for deleting. // - Mantains collection of all sets in use, needed later for establishing // character categories for run time engine. // - Eliminates mulitiple instances of the same set. // - Creates a new uset node if necessary (if this isn't a duplicate.) findSetFor(n->fText, n, uset); } }
UBool NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const { // try matching each rule in the rule set against the text being // parsed. Whichever one matches the most characters is the one // that determines the value we return. result.setLong(0); // dump out if there's no text to parse if (text.length() == 0) { return 0; } ParsePosition highWaterMark; ParsePosition workingPos = pos; #ifdef RBNF_DEBUG fprintf(stderr, "<nfrs> %x '", this); dumpUS(stderr, name); fprintf(stderr, "' text '"); dumpUS(stderr, text); fprintf(stderr, "'\n"); fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0); #endif // start by trying the negative number rule (if there is one) if (negativeNumberRule) { Formattable tempResult; #ifdef RBNF_DEBUG fprintf(stderr, " <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound); #endif UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult); #ifdef RBNF_DEBUG fprintf(stderr, " <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex()); #endif if (success && workingPos.getIndex() > highWaterMark.getIndex()) { result = tempResult; highWaterMark = workingPos; } workingPos = pos; } #ifdef RBNF_DEBUG fprintf(stderr, "<nfrs> continue fractional with text '"); dumpUS(stderr, text); fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); #endif // then try each of the fraction rules { for (int i = 0; i < 3; i++) { if (fractionRules[i]) { Formattable tempResult; UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult); if (success && (workingPos.getIndex() > highWaterMark.getIndex())) { result = tempResult; highWaterMark = workingPos; } workingPos = pos; } } } #ifdef RBNF_DEBUG fprintf(stderr, "<nfrs> continue other with text '"); dumpUS(stderr, text); fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); #endif // finally, go through the regular rules one at a time. We start // at the end of the list because we want to try matching the most // sigificant rule first (this helps ensure that we parse // "five thousand three hundred six" as // "(five thousand) (three hundred) (six)" rather than // "((five thousand three) hundred) (six)"). Skip rules whose // base values are higher than the upper bound (again, this helps // limit ambiguity by making sure the rules that match a rule's // are less significant than the rule containing the substitutions)/ { int64_t ub = util64_fromDouble(upperBound); #ifdef RBNF_DEBUG { char ubstr[64]; util64_toa(ub, ubstr, 64); char ubstrhex[64]; util64_toa(ub, ubstrhex, 64, 16); fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex); } #endif for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) { if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) { continue; } Formattable tempResult; UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult); if (success && workingPos.getIndex() > highWaterMark.getIndex()) { result = tempResult; highWaterMark = workingPos; } workingPos = pos; } } #ifdef RBNF_DEBUG fprintf(stderr, "<nfrs> exit\n"); #endif // finally, update the parse postion we were passed to point to the // first character we didn't use, and return the result that // corresponds to that string of characters pos = highWaterMark; return 1; }
UBool FractionalPartSubstitution::doParse(const UnicodeString& text, ParsePosition& parsePosition, double baseValue, double /*upperBound*/, UBool lenientParse, Formattable& resVal) const { // if we're not in byDigits mode, we can just use the inherited // doParse() if (!byDigits) { return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, resVal); // if we ARE in byDigits mode, parse the text one digit at a time // using this substitution's owning rule set (we do this by setting // upperBound to 10 when calling doParse() ) until we reach // nonmatching text } else { UnicodeString workText(text); ParsePosition workPos(1); double result = 0; int32_t digit; // double p10 = 0.1; DigitList dl; NumberFormat* fmt = NULL; while (workText.length() > 0 && workPos.getIndex() != 0) { workPos.setIndex(0); Formattable temp; getRuleSet()->parse(workText, workPos, 10, temp); UErrorCode status = U_ZERO_ERROR; digit = temp.getLong(status); // digit = temp.getType() == Formattable::kLong ? // temp.getLong() : // (int32_t)temp.getDouble(); if (lenientParse && workPos.getIndex() == 0) { if (!fmt) { status = U_ZERO_ERROR; fmt = NumberFormat::createInstance(status); if (U_FAILURE(status)) { delete fmt; fmt = NULL; } } if (fmt) { fmt->parse(workText, temp, workPos); digit = temp.getLong(status); } } if (workPos.getIndex() != 0) { dl.append((char)('0' + digit)); // result += digit * p10; // p10 /= 10; parsePosition.setIndex(parsePosition.getIndex() + workPos.getIndex()); workText.removeBetween(0, workPos.getIndex()); while (workText.length() > 0 && workText.charAt(0) == gSpace) { workText.removeBetween(0, 1); parsePosition.setIndex(parsePosition.getIndex() + 1); } } } delete fmt; result = dl.fCount == 0 ? 0 : dl.getDouble(); result = composeRuleValue(result, baseValue); resVal.setDouble(result); return TRUE; } }
void RelativeDateFormat::parse( const UnicodeString& text, Calendar& cal, ParsePosition& pos) const { int32_t startIndex = pos.getIndex(); if (fDatePattern.isEmpty()) { // no date pattern, try parsing as time fDateTimeFormatter->applyPattern(fTimePattern); fDateTimeFormatter->parse(text,cal,pos); } else if (fTimePattern.isEmpty() || fCombinedFormat == NULL) { // no time pattern or way to combine, try parsing as date // first check whether text matches a relativeDayString UBool matchedRelative = FALSE; for (int n=0; n < fDatesLen && !matchedRelative; n++) { if (fDates[n].string != NULL && text.compare(startIndex, fDates[n].len, fDates[n].string) == 0) { // it matched, handle the relative day string UErrorCode status = U_ZERO_ERROR; matchedRelative = TRUE; // Set the calendar to now+offset cal.setTime(Calendar::getNow(),status); cal.add(UCAL_DATE,fDates[n].offset, status); if(U_FAILURE(status)) { // failure in setting calendar field, set offset to beginning of rel day string pos.setErrorIndex(startIndex); } else { pos.setIndex(startIndex + fDates[n].len); } } } if (!matchedRelative) { // just parse as normal date fDateTimeFormatter->applyPattern(fDatePattern); fDateTimeFormatter->parse(text,cal,pos); } } else { // Here we replace any relativeDayString in text with the equivalent date // formatted per fDatePattern, then parse text normally using the combined pattern. UnicodeString modifiedText(text); FieldPosition fPos; int32_t dateStart = 0, origDateLen = 0, modDateLen = 0; UErrorCode status = U_ZERO_ERROR; for (int n=0; n < fDatesLen; n++) { int32_t relativeStringOffset; if (fDates[n].string != NULL && (relativeStringOffset = modifiedText.indexOf(fDates[n].string, fDates[n].len, startIndex)) >= startIndex) { // it matched, replace the relative date with a real one for parsing UnicodeString dateString; Calendar * tempCal = cal.clone(); // Set the calendar to now+offset tempCal->setTime(Calendar::getNow(),status); tempCal->add(UCAL_DATE,fDates[n].offset, status); if(U_FAILURE(status)) { pos.setErrorIndex(startIndex); delete tempCal; return; } fDateTimeFormatter->applyPattern(fDatePattern); fDateTimeFormatter->format(*tempCal, dateString, fPos); dateStart = relativeStringOffset; origDateLen = fDates[n].len; modDateLen = dateString.length(); modifiedText.replace(dateStart, origDateLen, dateString); delete tempCal; break; } } UnicodeString combinedPattern; fCombinedFormat->format(fTimePattern, fDatePattern, combinedPattern, status); fDateTimeFormatter->applyPattern(combinedPattern); fDateTimeFormatter->parse(modifiedText,cal,pos); // Adjust offsets UBool noError = (pos.getErrorIndex() < 0); int32_t offset = (noError)? pos.getIndex(): pos.getErrorIndex(); if (offset >= dateStart + modDateLen) { // offset at or after the end of the replaced text, // correct by the difference between original and replacement offset -= (modDateLen - origDateLen); } else if (offset >= dateStart) { // offset in the replaced text, set it to the beginning of that text // (i.e. the beginning of the relative day string) offset = dateStart; } if (noError) { pos.setIndex(offset); } else { pos.setErrorIndex(offset); } } }
UBool NumeratorSubstitution::doParse(const UnicodeString& text, ParsePosition& parsePosition, double baseValue, double upperBound, UBool /*lenientParse*/, Formattable& result) const { // we don't have to do anything special to do the parsing here, // but we have to turn lenient parsing off-- if we leave it on, // it SERIOUSLY messes up the algorithm // if withZeros is true, we need to count the zeros // and use that to adjust the parse result UErrorCode status = U_ZERO_ERROR; int32_t zeroCount = 0; UnicodeString workText(text); if (withZeros) { ParsePosition workPos(1); Formattable temp; while (workText.length() > 0 && workPos.getIndex() != 0) { workPos.setIndex(0); getRuleSet()->parse(workText, workPos, 1, temp); // parse zero or nothing at all if (workPos.getIndex() == 0) { // we failed, either there were no more zeros, or the number was formatted with digits // either way, we're done break; } ++zeroCount; parsePosition.setIndex(parsePosition.getIndex() + workPos.getIndex()); workText.remove(0, workPos.getIndex()); while (workText.length() > 0 && workText.charAt(0) == gSpace) { workText.remove(0, 1); parsePosition.setIndex(parsePosition.getIndex() + 1); } } workText = text; workText.remove(0, (int32_t)parsePosition.getIndex()); parsePosition.setIndex(0); } // we've parsed off the zeros, now let's parse the rest from our current position NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, FALSE, result); if (withZeros) { // any base value will do in this case. is there a way to // force this to not bother trying all the base values? // compute the 'effective' base and prescale the value down int64_t n = result.getLong(status); // force conversion! int64_t d = 1; int32_t pow = 0; while (d <= n) { d *= 10; ++pow; } // now add the zeros while (zeroCount > 0) { d *= 10; --zeroCount; } // d is now our true denominator result.setDouble((double)n/(double)d); } return TRUE; }
/** * Parses a string using the rule set or DecimalFormat belonging * to this substitution. If there's a match, a mathematical * operation (the inverse of the one used in formatting) is * performed on the result of the parse and the value passed in * and returned as the result. The parse position is updated to * point to the first unmatched character in the string. * @param text The string to parse * @param parsePosition On entry, ignored, but assumed to be 0. * On exit, this is updated to point to the first unmatched * character (or 0 if the substitution didn't match) * @param baseValue A partial parse result that should be * combined with the result of this parse * @param upperBound When searching the rule set for a rule * matching the string passed in, only rules with base values * lower than this are considered * @param lenientParse If true and matching against rules fails, * the substitution will also try matching the text against * numerals using a default-costructed NumberFormat. If false, * no extra work is done. (This value is false whenever the * formatter isn't in lenient-parse mode, but is also false * under some conditions even when the formatter _is_ in * lenient-parse mode.) * @return If there's a match, this is the result of composing * baseValue with whatever was returned from matching the * characters. This will be either a Long or a Double. If there's * no match this is new Long(0) (not null), and parsePosition * is left unchanged. */ UBool NFSubstitution::doParse(const UnicodeString& text, ParsePosition& parsePosition, double baseValue, double upperBound, UBool lenientParse, Formattable& result) const { #ifdef RBNF_DEBUG fprintf(stderr, "<nfsubs> %x bv: %g ub: %g\n", this, baseValue, upperBound); #endif // figure out the highest base value a rule can have and match // the text being parsed (this varies according to the type of // substitutions: multiplier, modulus, and numerator substitutions // restrict the search to rules with base values lower than their // own; same-value substitutions leave the upper bound wherever // it was, and the others allow any rule to match upperBound = calcUpperBound(upperBound); // use our rule set to parse the text. If that fails and // lenient parsing is enabled (this is always false if the // formatter's lenient-parsing mode is off, but it may also // be false even when the formatter's lenient-parse mode is // on), then also try parsing the text using a default- // constructed NumberFormat if (ruleSet != NULL) { ruleSet->parse(text, parsePosition, upperBound, result); if (lenientParse && !ruleSet->isFractionRuleSet() && parsePosition.getIndex() == 0) { UErrorCode status = U_ZERO_ERROR; NumberFormat* fmt = NumberFormat::createInstance(status); if (U_SUCCESS(status)) { fmt->parse(text, result, parsePosition); } delete fmt; } // ...or use our DecimalFormat to parse the text } else if (numberFormat != NULL) { numberFormat->parse(text, result, parsePosition); } // if the parse was successful, we've already advanced the caller's // parse position (this is the one function that doesn't have one // of its own). Derive a parse result and return it as a Long, // if possible, or a Double if (parsePosition.getIndex() != 0) { UErrorCode status = U_ZERO_ERROR; double tempResult = result.getDouble(status); // composeRuleValue() produces a full parse result from // the partial parse result passed to this function from // the caller (this is either the owning rule's base value // or the partial result obtained from composing the // owning rule's base value with its other substitution's // parse result) and the partial parse result obtained by // matching the substitution (which will be the same value // the caller would get by parsing just this part of the // text with RuleBasedNumberFormat.parse() ). How the two // values are used to derive the full parse result depends // on the types of substitutions: For a regular rule, the // ultimate result is its multiplier substitution's result // times the rule's divisor (or the rule's base value) plus // the modulus substitution's result (which will actually // supersede part of the rule's base value). For a negative- // number rule, the result is the negative of its substitution's // result. For a fraction rule, it's the sum of its two // substitution results. For a rule in a fraction rule set, // it's the numerator substitution's result divided by // the rule's base value. Results from same-value substitutions // propagate back upard, and null substitutions don't affect // the result. tempResult = composeRuleValue(tempResult, baseValue); result.setDouble(tempResult); return TRUE; // if the parse was UNsuccessful, return 0 } else { result.setLong(0); return FALSE; } }
static jobject parseRBNFImpl(JNIEnv *env, jclass clazz, jint addr, jstring text, jobject position, jboolean lenient) { // LOGI("ENTER parseRBNFImpl"); const char * parsePositionClassName = "java/text/ParsePosition"; const char * longClassName = "java/lang/Long"; const char * doubleClassName = "java/lang/Double"; UErrorCode status = U_ZERO_ERROR; UNumberFormat *fmt = (UNumberFormat *)(int)addr; jchar *str = (UChar *)env->GetStringChars(text, NULL); int strlength = env->GetStringLength(text); jclass parsePositionClass = env->FindClass(parsePositionClassName); jclass longClass = env->FindClass(longClassName); jclass doubleClass = env->FindClass(doubleClassName); jmethodID getIndexMethodID = env->GetMethodID(parsePositionClass, "getIndex", "()I"); jmethodID setIndexMethodID = env->GetMethodID(parsePositionClass, "setIndex", "(I)V"); jmethodID setErrorIndexMethodID = env->GetMethodID(parsePositionClass, "setErrorIndex", "(I)V"); jmethodID longInitMethodID = env->GetMethodID(longClass, "<init>", "(J)V"); jmethodID dblInitMethodID = env->GetMethodID(doubleClass, "<init>", "(D)V"); int parsePos = env->CallIntMethod(position, getIndexMethodID, NULL); // make sure the ParsePosition is valid. Actually icu4c would parse a number // correctly even if the parsePosition is set to -1, but since the RI fails // for that case we have to fail too if(parsePos < 0 || parsePos > strlength) { return NULL; } Formattable res; const UnicodeString src((UChar*)str, strlength, strlength); ParsePosition pp; pp.setIndex(parsePos); if(lenient) { unum_setAttribute(fmt, UNUM_LENIENT_PARSE, JNI_TRUE); } ((const NumberFormat*)fmt)->parse(src, res, pp); if(lenient) { unum_setAttribute(fmt, UNUM_LENIENT_PARSE, JNI_FALSE); } env->ReleaseStringChars(text, str); if(pp.getErrorIndex() == -1) { parsePos = pp.getIndex(); } else { env->CallVoidMethod(position, setErrorIndexMethodID, (jint) pp.getErrorIndex()); return NULL; } Formattable::Type numType; numType = res.getType(); UErrorCode fmtStatus; double resultDouble; long resultLong; int64_t resultInt64; switch(numType) { case Formattable::kDouble: resultDouble = res.getDouble(); env->CallVoidMethod(position, setIndexMethodID, (jint) parsePos); return env->NewObject(doubleClass, dblInitMethodID, (jdouble) resultDouble); case Formattable::kLong: resultLong = res.getLong(); env->CallVoidMethod(position, setIndexMethodID, (jint) parsePos); return env->NewObject(longClass, longInitMethodID, (jlong) resultLong); case Formattable::kInt64: resultInt64 = res.getInt64(); env->CallVoidMethod(position, setIndexMethodID, (jint) parsePos); return env->NewObject(longClass, longInitMethodID, (jlong) resultInt64); default: break; } return NULL; }
void TimeUnitFormat::parseObject(const UnicodeString& source, Formattable& result, ParsePosition& pos) const { Formattable resultNumber(0.0); UBool withNumberFormat = false; TimeUnit::UTimeUnitFields resultTimeUnit = TimeUnit::UTIMEUNIT_FIELD_COUNT; int32_t oldPos = pos.getIndex(); int32_t newPos = -1; int32_t longestParseDistance = 0; UnicodeString* countOfLongestMatch = NULL; #ifdef TMUTFMT_DEBUG char res[1000]; source.extract(0, source.length(), res, "UTF-8"); std::cout << "parse source: " << res << "\n"; #endif // parse by iterating through all available patterns // and looking for the longest match. for (TimeUnit::UTimeUnitFields i = TimeUnit::UTIMEUNIT_YEAR; i < TimeUnit::UTIMEUNIT_FIELD_COUNT; i = (TimeUnit::UTimeUnitFields)(i+1)) { Hashtable* countToPatterns = fTimeUnitToCountToPatterns[i]; int32_t elemPos = UHASH_FIRST; const UHashElement* elem = NULL; while ((elem = countToPatterns->nextElement(elemPos)) != NULL){ const UHashTok keyTok = elem->key; UnicodeString* count = (UnicodeString*)keyTok.pointer; #ifdef TMUTFMT_DEBUG count->extract(0, count->length(), res, "UTF-8"); std::cout << "parse plural count: " << res << "\n"; #endif const UHashTok valueTok = elem->value; // the value is a pair of MessageFormat* MessageFormat** patterns = (MessageFormat**)valueTok.pointer; for (UTimeUnitFormatStyle style = UTMUTFMT_FULL_STYLE; style < UTMUTFMT_FORMAT_STYLE_COUNT; style = (UTimeUnitFormatStyle)(style + 1)) { MessageFormat* pattern = patterns[style]; pos.setErrorIndex(-1); pos.setIndex(oldPos); // see if we can parse Formattable parsed; pattern->parseObject(source, parsed, pos); if (pos.getErrorIndex() != -1 || pos.getIndex() == oldPos) { continue; } #ifdef TMUTFMT_DEBUG std::cout << "parsed.getType: " << parsed.getType() << "\n"; #endif Formattable tmpNumber(0.0); if (pattern->getArgTypeCount() != 0) { Formattable& temp = parsed[0]; if (temp.getType() == Formattable::kString) { UnicodeString tmpString; UErrorCode pStatus = U_ZERO_ERROR; getNumberFormat().parse(temp.getString(tmpString), tmpNumber, pStatus); if (U_FAILURE(pStatus)) { continue; } } else if (temp.isNumeric()) { tmpNumber = temp; } else { continue; } } int32_t parseDistance = pos.getIndex() - oldPos; if (parseDistance > longestParseDistance) { if (pattern->getArgTypeCount() != 0) { resultNumber = tmpNumber; withNumberFormat = true; } else { withNumberFormat = false; } resultTimeUnit = i; newPos = pos.getIndex(); longestParseDistance = parseDistance; countOfLongestMatch = count; } } } } /* After find the longest match, parse the number. * Result number could be null for the pattern without number pattern. * such as unit pattern in Arabic. * When result number is null, use plural rule to set the number. */ if (withNumberFormat == false && longestParseDistance != 0) { // set the number using plurrual count if (0 == countOfLongestMatch->compare(PLURAL_COUNT_ZERO, 4)) { resultNumber = Formattable(0.0); } else if (0 == countOfLongestMatch->compare(PLURAL_COUNT_ONE, 3)) { resultNumber = Formattable(1.0); } else if (0 == countOfLongestMatch->compare(PLURAL_COUNT_TWO, 3)) { resultNumber = Formattable(2.0); } else { // should not happen. // TODO: how to handle? resultNumber = Formattable(3.0); } } if (longestParseDistance == 0) { pos.setIndex(oldPos); pos.setErrorIndex(0); } else { UErrorCode status = U_ZERO_ERROR; TimeUnitAmount* tmutamt = new TimeUnitAmount(resultNumber, resultTimeUnit, status); if (U_SUCCESS(status)) { result.adoptObject(tmutamt); pos.setIndex(newPos); pos.setErrorIndex(-1); } else { pos.setIndex(oldPos); pos.setErrorIndex(0); } } }
void TimeUnitFormat::parseObject(const UnicodeString& source, Formattable& result, ParsePosition& pos) const { double resultNumber = -1; UBool withNumberFormat = false; TimeUnit::UTimeUnitFields resultTimeUnit = TimeUnit::UTIMEUNIT_FIELD_COUNT; int32_t oldPos = pos.getIndex(); int32_t newPos = -1; int32_t longestParseDistance = 0; UnicodeString* countOfLongestMatch = NULL; #ifdef TMUTFMT_DEBUG char res[1000]; source.extract(0, source.length(), res, "UTF-8"); std::cout << "parse source: " << res << "\n"; #endif // parse by iterating through all available patterns // and looking for the longest match. for (TimeUnit::UTimeUnitFields i = TimeUnit::UTIMEUNIT_YEAR; i < TimeUnit::UTIMEUNIT_FIELD_COUNT; i = (TimeUnit::UTimeUnitFields)(i+1)) { Hashtable* countToPatterns = fTimeUnitToCountToPatterns[i]; int32_t elemPos = -1; const UHashElement* elem = NULL; while ((elem = countToPatterns->nextElement(elemPos)) != NULL){ const UHashTok keyTok = elem->key; UnicodeString* count = (UnicodeString*)keyTok.pointer; #ifdef TMUTFMT_DEBUG count->extract(0, count->length(), res, "UTF-8"); std::cout << "parse plural count: " << res << "\n"; #endif const UHashTok valueTok = elem->value; // the value is a pair of MessageFormat* MessageFormat** patterns = (MessageFormat**)valueTok.pointer; for (EStyle style = kFull; style < kTotal; style = (EStyle)(style + 1)) { MessageFormat* pattern = patterns[style]; pos.setErrorIndex(-1); pos.setIndex(oldPos); // see if we can parse Formattable parsed; pattern->parseObject(source, parsed, pos); if (pos.getErrorIndex() != -1 || pos.getIndex() == oldPos) { continue; } #ifdef TMUTFMT_DEBUG std::cout << "parsed.getType: " << parsed.getType() << "\n"; #endif double tmpNumber = 0; if (pattern->getArgTypeCount() != 0) { // pattern with Number as beginning, such as "{0} d". // check to make sure that the timeUnit is consistent Formattable& temp = parsed[0]; if (temp.getType() == Formattable::kDouble) { tmpNumber = temp.getDouble(); } else if (temp.getType() == Formattable::kLong) { tmpNumber = temp.getLong(); } else { continue; } UnicodeString select = fPluralRules->select(tmpNumber); #ifdef TMUTFMT_DEBUG select.extract(0, select.length(), res, "UTF-8"); std::cout << "parse plural select count: " << res << "\n"; #endif if (*count != select) { continue; } } int32_t parseDistance = pos.getIndex() - oldPos; if (parseDistance > longestParseDistance) { if (pattern->getArgTypeCount() != 0) { resultNumber = tmpNumber; withNumberFormat = true; } else { withNumberFormat = false; } resultTimeUnit = i; newPos = pos.getIndex(); longestParseDistance = parseDistance; countOfLongestMatch = count; } } } } /* After find the longest match, parse the number. * Result number could be null for the pattern without number pattern. * such as unit pattern in Arabic. * When result number is null, use plural rule to set the number. */ if (withNumberFormat == false && longestParseDistance != 0) { // set the number using plurrual count if ( *countOfLongestMatch == PLURAL_COUNT_ZERO ) { resultNumber = 0; } else if ( *countOfLongestMatch == PLURAL_COUNT_ONE ) { resultNumber = 1; } else if ( *countOfLongestMatch == PLURAL_COUNT_TWO ) { resultNumber = 2; } else { // should not happen. // TODO: how to handle? resultNumber = 3; } } if (longestParseDistance == 0) { pos.setIndex(oldPos); pos.setErrorIndex(0); } else { UErrorCode status = U_ZERO_ERROR; TimeUnitAmount* tmutamt = new TimeUnitAmount(resultNumber, resultTimeUnit, status); if (U_SUCCESS(status)) { result.adoptObject(tmutamt); pos.setIndex(newPos); pos.setErrorIndex(-1); } else { pos.setIndex(oldPos); pos.setErrorIndex(0); } } }