void RuleBasedNumberFormat::parse(const UnicodeString& text, Formattable& result, ParsePosition& parsePosition) const { //TODO: We need a real fix. See #6895 / #6896 if (noParse) { // skip parsing parsePosition.setErrorIndex(0); return; } if (!ruleSets) { parsePosition.setErrorIndex(0); return; } UnicodeString workingText(text, parsePosition.getIndex()); ParsePosition workingPos(0); ParsePosition high_pp(0); Formattable high_result; for (NFRuleSet** p = ruleSets; *p; ++p) { NFRuleSet *rp = *p; if (rp->isPublic() && rp->isParseable()) { ParsePosition working_pp(0); Formattable working_result; rp->parse(workingText, working_pp, kMaxDouble, working_result); if (working_pp.getIndex() > high_pp.getIndex()) { high_pp = working_pp; high_result = working_result; if (high_pp.getIndex() == workingText.length()) { break; } } } } int32_t startIndex = parsePosition.getIndex(); parsePosition.setIndex(startIndex + high_pp.getIndex()); if (high_pp.getIndex() > 0) { parsePosition.setErrorIndex(-1); } else { int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0; parsePosition.setErrorIndex(startIndex + errorIndex); } result = high_result; if (result.getType() == Formattable::kDouble) { int32_t r = (int32_t)result.getDouble(); if ((double)r == result.getDouble()) { result.setLong(r); } } }
void ChoiceFormat::parse(const UnicodeString& text, Formattable& result, ParsePosition& status) const { // find the best number (defined as the one with the longest parse) int32_t start = status.getIndex(); int32_t furthest = start; double bestNumber = uprv_getNaN(); double tempNumber = 0.0; for (int i = 0; i < fCount; ++i) { int32_t len = fChoiceFormats[i].length(); if (text.compare(start, len, fChoiceFormats[i]) == 0) { status.setIndex(start + len); tempNumber = fChoiceLimits[i]; if (status.getIndex() > furthest) { furthest = status.getIndex(); bestNumber = tempNumber; if (furthest == text.length()) break; } } } status.setIndex(furthest); if (status.getIndex() == start) { status.setErrorIndex(furthest); } result.setDouble(bestNumber); }
UDate DateFormat::parse(const UnicodeString& text, ParsePosition& pos) const { UDate d = 0; // Error return UDate is 0 (the epoch) if (fCalendar != NULL) { Calendar* calClone = fCalendar->clone(); if (calClone != NULL) { int32_t start = pos.getIndex(); calClone->clear(); parse(text, *calClone, pos); if (pos.getIndex() != start) { UErrorCode ec = U_ZERO_ERROR; d = calClone->getTime(ec); if (U_FAILURE(ec)) { // We arrive here if fCalendar => calClone is non-lenient and // there is an out-of-range field. We don't know which field // was illegal so we set the error index to the start. pos.setIndex(start); pos.setErrorIndex(start); d = 0; } } delete calClone; } } return d; }
double ChoiceFormat::parseArgument( const MessagePattern &pattern, int32_t partIndex, const UnicodeString &source, ParsePosition &pos) { // find the best number (defined as the one with the longest parse) int32_t start = pos.getIndex(); int32_t furthest = start; double bestNumber = uprv_getNaN(); double tempNumber = 0.0; int32_t count = pattern.countParts(); while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) { tempNumber = pattern.getNumericValue(pattern.getPart(partIndex)); partIndex += 2; // skip the numeric part and ignore the ARG_SELECTOR int32_t msgLimit = pattern.getLimitPartIndex(partIndex); int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start); if (len >= 0) { int32_t newIndex = start + len; if (newIndex > furthest) { furthest = newIndex; bestNumber = tempNumber; if (furthest == source.length()) { break; } } } partIndex = msgLimit + 1; } if (furthest == start) { pos.setErrorIndex(start); } else { pos.setIndex(furthest); } return bestNumber; }
UDate DateFormat::parse(const UnicodeString& text, ParsePosition& pos) const { UDate d = 0; // Error return UDate is 0 (the epoch) if (fCalendar != NULL) { int32_t start = pos.getIndex(); // Parse may update TimeZone used by the calendar. TimeZone *tzsav = (TimeZone*)fCalendar->getTimeZone().clone(); fCalendar->clear(); parse(text, *fCalendar, pos); if (pos.getIndex() != start) { UErrorCode ec = U_ZERO_ERROR; d = fCalendar->getTime(ec); if (U_FAILURE(ec)) { // We arrive here if fCalendar is non-lenient and there // is an out-of-range field. We don't know which field // was illegal so we set the error index to the start. pos.setIndex(start); pos.setErrorIndex(start); d = 0; } } // Restore TimeZone fCalendar->adoptTimeZone(tzsav); } return d; }
void PluralFormat::parseObject(const UnicodeString& /*source*/, Formattable& /*result*/, ParsePosition& pos) const { // Parsing not supported. pos.setErrorIndex(pos.getIndex()); }
void SelectFormat::parseObject(const UnicodeString& /*source*/, Formattable& /*result*/, ParsePosition& pos) const { // TODO: not yet supported in icu4j and icu4c pos.setErrorIndex(pos.getIndex()); }
void RelativeDateFormat::parse( const UnicodeString& text, Calendar& cal, ParsePosition& pos) const { // Can the fDateFormat parse it? if(fDateFormat != NULL) { ParsePosition aPos(pos); fDateFormat->parse(text,cal,aPos); if((aPos.getIndex() != pos.getIndex()) && (aPos.getErrorIndex()==-1)) { pos=aPos; // copy the sub parse return; // parsed subfmt OK } } // Linear search the relative strings for(int n=0; n<fDatesLen; n++) { if(fDates[n].string != NULL && (0==text.compare(pos.getIndex(), fDates[n].len, fDates[n].string))) { UErrorCode status = U_ZERO_ERROR; // Set the calendar to now+offset cal.setTime(Calendar::getNow(),status); cal.add(UCAL_DATE,fDates[n].offset, status); if(U_FAILURE(status)) { // failure in setting calendar fields pos.setErrorIndex(pos.getIndex()+fDates[n].len); } else { pos.setIndex(pos.getIndex()+fDates[n].len); } return; } } // parse failed }
UDate DateFormat::parse(const UnicodeString& text, ParsePosition& pos) const { if (fCalendar != NULL) { int32_t start = pos.getIndex(); fCalendar->clear(); parse(text, *fCalendar, pos); if (pos.getIndex() != start) { UErrorCode ec = U_ZERO_ERROR; UDate d = fCalendar->getTime(ec); if (U_SUCCESS(ec)) { return d; // Successful function exit } // We arrive here if fCalendar is non-lenient and there // is an out-of-range field. We don't know which field // was illegal so we set the error index to the start. pos.setIndex(start); pos.setErrorIndex(start); } } return 0; // Error return UDate is 0 (the epoch) }
void RelativeDateFormat::parse( const UnicodeString& text, Calendar& cal, ParsePosition& pos) const { int32_t startIndex = pos.getIndex(); if (fDatePattern.isEmpty()) { // no date pattern, try parsing as time fDateTimeFormatter->applyPattern(fTimePattern); fDateTimeFormatter->parse(text,cal,pos); } else if (fTimePattern.isEmpty() || fCombinedFormat == NULL) { // no time pattern or way to combine, try parsing as date // first check whether text matches a relativeDayString UBool matchedRelative = FALSE; for (int n=0; n < fDatesLen && !matchedRelative; n++) { if (fDates[n].string != NULL && text.compare(startIndex, fDates[n].len, fDates[n].string) == 0) { // it matched, handle the relative day string UErrorCode status = U_ZERO_ERROR; matchedRelative = TRUE; // Set the calendar to now+offset cal.setTime(Calendar::getNow(),status); cal.add(UCAL_DATE,fDates[n].offset, status); if(U_FAILURE(status)) { // failure in setting calendar field, set offset to beginning of rel day string pos.setErrorIndex(startIndex); } else { pos.setIndex(startIndex + fDates[n].len); } } } if (!matchedRelative) { // just parse as normal date fDateTimeFormatter->applyPattern(fDatePattern); fDateTimeFormatter->parse(text,cal,pos); } } else { // Here we replace any relativeDayString in text with the equivalent date // formatted per fDatePattern, then parse text normally using the combined pattern. UnicodeString modifiedText(text); FieldPosition fPos; int32_t dateStart = 0, origDateLen = 0, modDateLen = 0; UErrorCode status = U_ZERO_ERROR; for (int n=0; n < fDatesLen; n++) { int32_t relativeStringOffset; if (fDates[n].string != NULL && (relativeStringOffset = modifiedText.indexOf(fDates[n].string, fDates[n].len, startIndex)) >= startIndex) { // it matched, replace the relative date with a real one for parsing UnicodeString dateString; Calendar * tempCal = cal.clone(); // Set the calendar to now+offset tempCal->setTime(Calendar::getNow(),status); tempCal->add(UCAL_DATE,fDates[n].offset, status); if(U_FAILURE(status)) { pos.setErrorIndex(startIndex); delete tempCal; return; } fDateTimeFormatter->applyPattern(fDatePattern); fDateTimeFormatter->format(*tempCal, dateString, fPos); dateStart = relativeStringOffset; origDateLen = fDates[n].len; modDateLen = dateString.length(); modifiedText.replace(dateStart, origDateLen, dateString); delete tempCal; break; } } UnicodeString combinedPattern; fCombinedFormat->format(fTimePattern, fDatePattern, combinedPattern, status); fDateTimeFormatter->applyPattern(combinedPattern); fDateTimeFormatter->parse(modifiedText,cal,pos); // Adjust offsets UBool noError = (pos.getErrorIndex() < 0); int32_t offset = (noError)? pos.getIndex(): pos.getErrorIndex(); if (offset >= dateStart + modDateLen) { // offset at or after the end of the replaced text, // correct by the difference between original and replacement offset -= (modDateLen - origDateLen); } else if (offset >= dateStart) { // offset in the replaced text, set it to the beginning of that text // (i.e. the beginning of the relative day string) offset = dateStart; } if (noError) { pos.setIndex(offset); } else { pos.setErrorIndex(offset); } } }
void TimeUnitFormat::parseObject(const UnicodeString& source, Formattable& result, ParsePosition& pos) const { Formattable resultNumber(0.0); UBool withNumberFormat = false; TimeUnit::UTimeUnitFields resultTimeUnit = TimeUnit::UTIMEUNIT_FIELD_COUNT; int32_t oldPos = pos.getIndex(); int32_t newPos = -1; int32_t longestParseDistance = 0; UnicodeString* countOfLongestMatch = NULL; #ifdef TMUTFMT_DEBUG char res[1000]; source.extract(0, source.length(), res, "UTF-8"); std::cout << "parse source: " << res << "\n"; #endif // parse by iterating through all available patterns // and looking for the longest match. for (TimeUnit::UTimeUnitFields i = TimeUnit::UTIMEUNIT_YEAR; i < TimeUnit::UTIMEUNIT_FIELD_COUNT; i = (TimeUnit::UTimeUnitFields)(i+1)) { Hashtable* countToPatterns = fTimeUnitToCountToPatterns[i]; int32_t elemPos = UHASH_FIRST; const UHashElement* elem = NULL; while ((elem = countToPatterns->nextElement(elemPos)) != NULL){ const UHashTok keyTok = elem->key; UnicodeString* count = (UnicodeString*)keyTok.pointer; #ifdef TMUTFMT_DEBUG count->extract(0, count->length(), res, "UTF-8"); std::cout << "parse plural count: " << res << "\n"; #endif const UHashTok valueTok = elem->value; // the value is a pair of MessageFormat* MessageFormat** patterns = (MessageFormat**)valueTok.pointer; for (UTimeUnitFormatStyle style = UTMUTFMT_FULL_STYLE; style < UTMUTFMT_FORMAT_STYLE_COUNT; style = (UTimeUnitFormatStyle)(style + 1)) { MessageFormat* pattern = patterns[style]; pos.setErrorIndex(-1); pos.setIndex(oldPos); // see if we can parse Formattable parsed; pattern->parseObject(source, parsed, pos); if (pos.getErrorIndex() != -1 || pos.getIndex() == oldPos) { continue; } #ifdef TMUTFMT_DEBUG std::cout << "parsed.getType: " << parsed.getType() << "\n"; #endif Formattable tmpNumber(0.0); if (pattern->getArgTypeCount() != 0) { Formattable& temp = parsed[0]; if (temp.getType() == Formattable::kString) { UnicodeString tmpString; UErrorCode pStatus = U_ZERO_ERROR; getNumberFormat().parse(temp.getString(tmpString), tmpNumber, pStatus); if (U_FAILURE(pStatus)) { continue; } } else if (temp.isNumeric()) { tmpNumber = temp; } else { continue; } } int32_t parseDistance = pos.getIndex() - oldPos; if (parseDistance > longestParseDistance) { if (pattern->getArgTypeCount() != 0) { resultNumber = tmpNumber; withNumberFormat = true; } else { withNumberFormat = false; } resultTimeUnit = i; newPos = pos.getIndex(); longestParseDistance = parseDistance; countOfLongestMatch = count; } } } } /* After find the longest match, parse the number. * Result number could be null for the pattern without number pattern. * such as unit pattern in Arabic. * When result number is null, use plural rule to set the number. */ if (withNumberFormat == false && longestParseDistance != 0) { // set the number using plurrual count if (0 == countOfLongestMatch->compare(PLURAL_COUNT_ZERO, 4)) { resultNumber = Formattable(0.0); } else if (0 == countOfLongestMatch->compare(PLURAL_COUNT_ONE, 3)) { resultNumber = Formattable(1.0); } else if (0 == countOfLongestMatch->compare(PLURAL_COUNT_TWO, 3)) { resultNumber = Formattable(2.0); } else { // should not happen. // TODO: how to handle? resultNumber = Formattable(3.0); } } if (longestParseDistance == 0) { pos.setIndex(oldPos); pos.setErrorIndex(0); } else { UErrorCode status = U_ZERO_ERROR; TimeUnitAmount* tmutamt = new TimeUnitAmount(resultNumber, resultTimeUnit, status); if (U_SUCCESS(status)) { result.adoptObject(tmutamt); pos.setIndex(newPos); pos.setErrorIndex(-1); } else { pos.setIndex(oldPos); pos.setErrorIndex(0); } } }
void TimeUnitFormat::parseObject(const UnicodeString& source, Formattable& result, ParsePosition& pos) const { double resultNumber = -1; UBool withNumberFormat = false; TimeUnit::UTimeUnitFields resultTimeUnit = TimeUnit::UTIMEUNIT_FIELD_COUNT; int32_t oldPos = pos.getIndex(); int32_t newPos = -1; int32_t longestParseDistance = 0; UnicodeString* countOfLongestMatch = NULL; #ifdef TMUTFMT_DEBUG char res[1000]; source.extract(0, source.length(), res, "UTF-8"); std::cout << "parse source: " << res << "\n"; #endif // parse by iterating through all available patterns // and looking for the longest match. for (TimeUnit::UTimeUnitFields i = TimeUnit::UTIMEUNIT_YEAR; i < TimeUnit::UTIMEUNIT_FIELD_COUNT; i = (TimeUnit::UTimeUnitFields)(i+1)) { Hashtable* countToPatterns = fTimeUnitToCountToPatterns[i]; int32_t elemPos = -1; const UHashElement* elem = NULL; while ((elem = countToPatterns->nextElement(elemPos)) != NULL){ const UHashTok keyTok = elem->key; UnicodeString* count = (UnicodeString*)keyTok.pointer; #ifdef TMUTFMT_DEBUG count->extract(0, count->length(), res, "UTF-8"); std::cout << "parse plural count: " << res << "\n"; #endif const UHashTok valueTok = elem->value; // the value is a pair of MessageFormat* MessageFormat** patterns = (MessageFormat**)valueTok.pointer; for (EStyle style = kFull; style < kTotal; style = (EStyle)(style + 1)) { MessageFormat* pattern = patterns[style]; pos.setErrorIndex(-1); pos.setIndex(oldPos); // see if we can parse Formattable parsed; pattern->parseObject(source, parsed, pos); if (pos.getErrorIndex() != -1 || pos.getIndex() == oldPos) { continue; } #ifdef TMUTFMT_DEBUG std::cout << "parsed.getType: " << parsed.getType() << "\n"; #endif double tmpNumber = 0; if (pattern->getArgTypeCount() != 0) { // pattern with Number as beginning, such as "{0} d". // check to make sure that the timeUnit is consistent Formattable& temp = parsed[0]; if (temp.getType() == Formattable::kDouble) { tmpNumber = temp.getDouble(); } else if (temp.getType() == Formattable::kLong) { tmpNumber = temp.getLong(); } else { continue; } UnicodeString select = fPluralRules->select(tmpNumber); #ifdef TMUTFMT_DEBUG select.extract(0, select.length(), res, "UTF-8"); std::cout << "parse plural select count: " << res << "\n"; #endif if (*count != select) { continue; } } int32_t parseDistance = pos.getIndex() - oldPos; if (parseDistance > longestParseDistance) { if (pattern->getArgTypeCount() != 0) { resultNumber = tmpNumber; withNumberFormat = true; } else { withNumberFormat = false; } resultTimeUnit = i; newPos = pos.getIndex(); longestParseDistance = parseDistance; countOfLongestMatch = count; } } } } /* After find the longest match, parse the number. * Result number could be null for the pattern without number pattern. * such as unit pattern in Arabic. * When result number is null, use plural rule to set the number. */ if (withNumberFormat == false && longestParseDistance != 0) { // set the number using plurrual count if ( *countOfLongestMatch == PLURAL_COUNT_ZERO ) { resultNumber = 0; } else if ( *countOfLongestMatch == PLURAL_COUNT_ONE ) { resultNumber = 1; } else if ( *countOfLongestMatch == PLURAL_COUNT_TWO ) { resultNumber = 2; } else { // should not happen. // TODO: how to handle? resultNumber = 3; } } if (longestParseDistance == 0) { pos.setIndex(oldPos); pos.setErrorIndex(0); } else { UErrorCode status = U_ZERO_ERROR; TimeUnitAmount* tmutamt = new TimeUnitAmount(resultNumber, resultTimeUnit, status); if (U_SUCCESS(status)) { result.adoptObject(tmutamt); pos.setIndex(newPos); pos.setErrorIndex(-1); } else { pos.setIndex(oldPos); pos.setErrorIndex(0); } } }