void RelativeDateFormat::parse( const UnicodeString& text, Calendar& cal, ParsePosition& pos) const { int32_t startIndex = pos.getIndex(); if (fDatePattern.isEmpty()) { // no date pattern, try parsing as time fDateTimeFormatter->applyPattern(fTimePattern); fDateTimeFormatter->parse(text,cal,pos); } else if (fTimePattern.isEmpty() || fCombinedFormat == NULL) { // no time pattern or way to combine, try parsing as date // first check whether text matches a relativeDayString UBool matchedRelative = FALSE; for (int n=0; n < fDatesLen && !matchedRelative; n++) { if (fDates[n].string != NULL && text.compare(startIndex, fDates[n].len, fDates[n].string) == 0) { // it matched, handle the relative day string UErrorCode status = U_ZERO_ERROR; matchedRelative = TRUE; // Set the calendar to now+offset cal.setTime(Calendar::getNow(),status); cal.add(UCAL_DATE,fDates[n].offset, status); if(U_FAILURE(status)) { // failure in setting calendar field, set offset to beginning of rel day string pos.setErrorIndex(startIndex); } else { pos.setIndex(startIndex + fDates[n].len); } } } if (!matchedRelative) { // just parse as normal date fDateTimeFormatter->applyPattern(fDatePattern); fDateTimeFormatter->parse(text,cal,pos); } } else { // Here we replace any relativeDayString in text with the equivalent date // formatted per fDatePattern, then parse text normally using the combined pattern. UnicodeString modifiedText(text); FieldPosition fPos; int32_t dateStart = 0, origDateLen = 0, modDateLen = 0; UErrorCode status = U_ZERO_ERROR; for (int n=0; n < fDatesLen; n++) { int32_t relativeStringOffset; if (fDates[n].string != NULL && (relativeStringOffset = modifiedText.indexOf(fDates[n].string, fDates[n].len, startIndex)) >= startIndex) { // it matched, replace the relative date with a real one for parsing UnicodeString dateString; Calendar * tempCal = cal.clone(); // Set the calendar to now+offset tempCal->setTime(Calendar::getNow(),status); tempCal->add(UCAL_DATE,fDates[n].offset, status); if(U_FAILURE(status)) { pos.setErrorIndex(startIndex); delete tempCal; return; } fDateTimeFormatter->applyPattern(fDatePattern); fDateTimeFormatter->format(*tempCal, dateString, fPos); dateStart = relativeStringOffset; origDateLen = fDates[n].len; modDateLen = dateString.length(); modifiedText.replace(dateStart, origDateLen, dateString); delete tempCal; break; } } UnicodeString combinedPattern; fCombinedFormat->format(fTimePattern, fDatePattern, combinedPattern, status); fDateTimeFormatter->applyPattern(combinedPattern); fDateTimeFormatter->parse(modifiedText,cal,pos); // Adjust offsets UBool noError = (pos.getErrorIndex() < 0); int32_t offset = (noError)? pos.getIndex(): pos.getErrorIndex(); if (offset >= dateStart + modDateLen) { // offset at or after the end of the replaced text, // correct by the difference between original and replacement offset -= (modDateLen - origDateLen); } else if (offset >= dateStart) { // offset in the replaced text, set it to the beginning of that text // (i.e. the beginning of the relative day string) offset = dateStart; } if (noError) { pos.setIndex(offset); } else { pos.setErrorIndex(offset); } } }
void TimeUnitFormat::parseObject(const UnicodeString& source, Formattable& result, ParsePosition& pos) const { Formattable resultNumber(0.0); UBool withNumberFormat = false; TimeUnit::UTimeUnitFields resultTimeUnit = TimeUnit::UTIMEUNIT_FIELD_COUNT; int32_t oldPos = pos.getIndex(); int32_t newPos = -1; int32_t longestParseDistance = 0; UnicodeString* countOfLongestMatch = NULL; #ifdef TMUTFMT_DEBUG char res[1000]; source.extract(0, source.length(), res, "UTF-8"); std::cout << "parse source: " << res << "\n"; #endif // parse by iterating through all available patterns // and looking for the longest match. for (TimeUnit::UTimeUnitFields i = TimeUnit::UTIMEUNIT_YEAR; i < TimeUnit::UTIMEUNIT_FIELD_COUNT; i = (TimeUnit::UTimeUnitFields)(i+1)) { Hashtable* countToPatterns = fTimeUnitToCountToPatterns[i]; int32_t elemPos = UHASH_FIRST; const UHashElement* elem = NULL; while ((elem = countToPatterns->nextElement(elemPos)) != NULL){ const UHashTok keyTok = elem->key; UnicodeString* count = (UnicodeString*)keyTok.pointer; #ifdef TMUTFMT_DEBUG count->extract(0, count->length(), res, "UTF-8"); std::cout << "parse plural count: " << res << "\n"; #endif const UHashTok valueTok = elem->value; // the value is a pair of MessageFormat* MessageFormat** patterns = (MessageFormat**)valueTok.pointer; for (UTimeUnitFormatStyle style = UTMUTFMT_FULL_STYLE; style < UTMUTFMT_FORMAT_STYLE_COUNT; style = (UTimeUnitFormatStyle)(style + 1)) { MessageFormat* pattern = patterns[style]; pos.setErrorIndex(-1); pos.setIndex(oldPos); // see if we can parse Formattable parsed; pattern->parseObject(source, parsed, pos); if (pos.getErrorIndex() != -1 || pos.getIndex() == oldPos) { continue; } #ifdef TMUTFMT_DEBUG std::cout << "parsed.getType: " << parsed.getType() << "\n"; #endif Formattable tmpNumber(0.0); if (pattern->getArgTypeCount() != 0) { Formattable& temp = parsed[0]; if (temp.getType() == Formattable::kString) { UnicodeString tmpString; UErrorCode pStatus = U_ZERO_ERROR; getNumberFormat().parse(temp.getString(tmpString), tmpNumber, pStatus); if (U_FAILURE(pStatus)) { continue; } } else if (temp.isNumeric()) { tmpNumber = temp; } else { continue; } } int32_t parseDistance = pos.getIndex() - oldPos; if (parseDistance > longestParseDistance) { if (pattern->getArgTypeCount() != 0) { resultNumber = tmpNumber; withNumberFormat = true; } else { withNumberFormat = false; } resultTimeUnit = i; newPos = pos.getIndex(); longestParseDistance = parseDistance; countOfLongestMatch = count; } } } } /* After find the longest match, parse the number. * Result number could be null for the pattern without number pattern. * such as unit pattern in Arabic. * When result number is null, use plural rule to set the number. */ if (withNumberFormat == false && longestParseDistance != 0) { // set the number using plurrual count if (0 == countOfLongestMatch->compare(PLURAL_COUNT_ZERO, 4)) { resultNumber = Formattable(0.0); } else if (0 == countOfLongestMatch->compare(PLURAL_COUNT_ONE, 3)) { resultNumber = Formattable(1.0); } else if (0 == countOfLongestMatch->compare(PLURAL_COUNT_TWO, 3)) { resultNumber = Formattable(2.0); } else { // should not happen. // TODO: how to handle? resultNumber = Formattable(3.0); } } if (longestParseDistance == 0) { pos.setIndex(oldPos); pos.setErrorIndex(0); } else { UErrorCode status = U_ZERO_ERROR; TimeUnitAmount* tmutamt = new TimeUnitAmount(resultNumber, resultTimeUnit, status); if (U_SUCCESS(status)) { result.adoptObject(tmutamt); pos.setIndex(newPos); pos.setErrorIndex(-1); } else { pos.setIndex(oldPos); pos.setErrorIndex(0); } } }
void TimeUnitFormat::parseObject(const UnicodeString& source, Formattable& result, ParsePosition& pos) const { double resultNumber = -1; UBool withNumberFormat = false; TimeUnit::UTimeUnitFields resultTimeUnit = TimeUnit::UTIMEUNIT_FIELD_COUNT; int32_t oldPos = pos.getIndex(); int32_t newPos = -1; int32_t longestParseDistance = 0; UnicodeString* countOfLongestMatch = NULL; #ifdef TMUTFMT_DEBUG char res[1000]; source.extract(0, source.length(), res, "UTF-8"); std::cout << "parse source: " << res << "\n"; #endif // parse by iterating through all available patterns // and looking for the longest match. for (TimeUnit::UTimeUnitFields i = TimeUnit::UTIMEUNIT_YEAR; i < TimeUnit::UTIMEUNIT_FIELD_COUNT; i = (TimeUnit::UTimeUnitFields)(i+1)) { Hashtable* countToPatterns = fTimeUnitToCountToPatterns[i]; int32_t elemPos = -1; const UHashElement* elem = NULL; while ((elem = countToPatterns->nextElement(elemPos)) != NULL){ const UHashTok keyTok = elem->key; UnicodeString* count = (UnicodeString*)keyTok.pointer; #ifdef TMUTFMT_DEBUG count->extract(0, count->length(), res, "UTF-8"); std::cout << "parse plural count: " << res << "\n"; #endif const UHashTok valueTok = elem->value; // the value is a pair of MessageFormat* MessageFormat** patterns = (MessageFormat**)valueTok.pointer; for (EStyle style = kFull; style < kTotal; style = (EStyle)(style + 1)) { MessageFormat* pattern = patterns[style]; pos.setErrorIndex(-1); pos.setIndex(oldPos); // see if we can parse Formattable parsed; pattern->parseObject(source, parsed, pos); if (pos.getErrorIndex() != -1 || pos.getIndex() == oldPos) { continue; } #ifdef TMUTFMT_DEBUG std::cout << "parsed.getType: " << parsed.getType() << "\n"; #endif double tmpNumber = 0; if (pattern->getArgTypeCount() != 0) { // pattern with Number as beginning, such as "{0} d". // check to make sure that the timeUnit is consistent Formattable& temp = parsed[0]; if (temp.getType() == Formattable::kDouble) { tmpNumber = temp.getDouble(); } else if (temp.getType() == Formattable::kLong) { tmpNumber = temp.getLong(); } else { continue; } UnicodeString select = fPluralRules->select(tmpNumber); #ifdef TMUTFMT_DEBUG select.extract(0, select.length(), res, "UTF-8"); std::cout << "parse plural select count: " << res << "\n"; #endif if (*count != select) { continue; } } int32_t parseDistance = pos.getIndex() - oldPos; if (parseDistance > longestParseDistance) { if (pattern->getArgTypeCount() != 0) { resultNumber = tmpNumber; withNumberFormat = true; } else { withNumberFormat = false; } resultTimeUnit = i; newPos = pos.getIndex(); longestParseDistance = parseDistance; countOfLongestMatch = count; } } } } /* After find the longest match, parse the number. * Result number could be null for the pattern without number pattern. * such as unit pattern in Arabic. * When result number is null, use plural rule to set the number. */ if (withNumberFormat == false && longestParseDistance != 0) { // set the number using plurrual count if ( *countOfLongestMatch == PLURAL_COUNT_ZERO ) { resultNumber = 0; } else if ( *countOfLongestMatch == PLURAL_COUNT_ONE ) { resultNumber = 1; } else if ( *countOfLongestMatch == PLURAL_COUNT_TWO ) { resultNumber = 2; } else { // should not happen. // TODO: how to handle? resultNumber = 3; } } if (longestParseDistance == 0) { pos.setIndex(oldPos); pos.setErrorIndex(0); } else { UErrorCode status = U_ZERO_ERROR; TimeUnitAmount* tmutamt = new TimeUnitAmount(resultNumber, resultTimeUnit, status); if (U_SUCCESS(status)) { result.adoptObject(tmutamt); pos.setIndex(newPos); pos.setErrorIndex(-1); } else { pos.setIndex(oldPos); pos.setErrorIndex(0); } } }
static jobject parseRBNFImpl(JNIEnv *env, jclass clazz, jint addr, jstring text, jobject position, jboolean lenient) { // LOGI("ENTER parseRBNFImpl"); const char * parsePositionClassName = "java/text/ParsePosition"; const char * longClassName = "java/lang/Long"; const char * doubleClassName = "java/lang/Double"; UErrorCode status = U_ZERO_ERROR; UNumberFormat *fmt = (UNumberFormat *)(int)addr; jchar *str = (UChar *)env->GetStringChars(text, NULL); int strlength = env->GetStringLength(text); jclass parsePositionClass = env->FindClass(parsePositionClassName); jclass longClass = env->FindClass(longClassName); jclass doubleClass = env->FindClass(doubleClassName); jmethodID getIndexMethodID = env->GetMethodID(parsePositionClass, "getIndex", "()I"); jmethodID setIndexMethodID = env->GetMethodID(parsePositionClass, "setIndex", "(I)V"); jmethodID setErrorIndexMethodID = env->GetMethodID(parsePositionClass, "setErrorIndex", "(I)V"); jmethodID longInitMethodID = env->GetMethodID(longClass, "<init>", "(J)V"); jmethodID dblInitMethodID = env->GetMethodID(doubleClass, "<init>", "(D)V"); int parsePos = env->CallIntMethod(position, getIndexMethodID, NULL); // make sure the ParsePosition is valid. Actually icu4c would parse a number // correctly even if the parsePosition is set to -1, but since the RI fails // for that case we have to fail too if(parsePos < 0 || parsePos > strlength) { return NULL; } Formattable res; const UnicodeString src((UChar*)str, strlength, strlength); ParsePosition pp; pp.setIndex(parsePos); if(lenient) { unum_setAttribute(fmt, UNUM_LENIENT_PARSE, JNI_TRUE); } ((const NumberFormat*)fmt)->parse(src, res, pp); if(lenient) { unum_setAttribute(fmt, UNUM_LENIENT_PARSE, JNI_FALSE); } env->ReleaseStringChars(text, str); if(pp.getErrorIndex() == -1) { parsePos = pp.getIndex(); } else { env->CallVoidMethod(position, setErrorIndexMethodID, (jint) pp.getErrorIndex()); return NULL; } Formattable::Type numType; numType = res.getType(); UErrorCode fmtStatus; double resultDouble; long resultLong; int64_t resultInt64; switch(numType) { case Formattable::kDouble: resultDouble = res.getDouble(); env->CallVoidMethod(position, setIndexMethodID, (jint) parsePos); return env->NewObject(doubleClass, dblInitMethodID, (jdouble) resultDouble); case Formattable::kLong: resultLong = res.getLong(); env->CallVoidMethod(position, setIndexMethodID, (jint) parsePos); return env->NewObject(longClass, longInitMethodID, (jlong) resultLong); case Formattable::kInt64: resultInt64 = res.getInt64(); env->CallVoidMethod(position, setIndexMethodID, (jint) parsePos); return env->NewObject(longClass, longInitMethodID, (jlong) resultInt64); default: break; } return NULL; }
/** * Parse date-time objects * * @param str * @param format * @param tz * @param lenient * @param locale * * @return character vector * * @version 0.5-1 (Marek Gagolewski, 2015-01-08) * @version 0.5-1 (Marek Gagolewski, 2015-01-11) lenient arg added * @version 0.5-1 (Marek Gagolewski, 2015-02-22) use tz * @version 0.5-1 (Marek Gagolewski, 2015-03-01) set tzone attrib on retval */ SEXP stri_datetime_parse(SEXP str, SEXP format, SEXP lenient, SEXP tz, SEXP locale) { PROTECT(str = stri_prepare_arg_string(str, "str")); const char* locale_val = stri__prepare_arg_locale(locale, "locale", true); const char* format_val = stri__prepare_arg_string_1_notNA(format, "format"); bool lenient_val = stri__prepare_arg_logical_1_notNA(lenient, "lenient"); if (!isNull(tz)) PROTECT(tz = stri_prepare_arg_string_1(tz, "tz")); else PROTECT(tz); /* needed to set tzone attrib */ // "format" may be one of: const char* format_opts[] = { "date_full", "date_long", "date_medium", "date_short", "date_relative_full", "date_relative_long", "date_relative_medium", "date_relative_short", "time_full", "time_long", "time_medium", "time_short", "time_relative_full", "time_relative_long", "time_relative_medium", "time_relative_short", "datetime_full", "datetime_long", "datetime_medium", "datetime_short", "datetime_relative_full", "datetime_relative_long", "datetime_relative_medium", "datetime_relative_short", NULL}; int format_cur = stri__match_arg(format_val, format_opts); TimeZone* tz_val = stri__prepare_arg_timezone(tz, "tz", true/*allowdefault*/); Calendar* cal = NULL; DateFormat* fmt = NULL; STRI__ERROR_HANDLER_BEGIN(2) R_len_t vectorize_length = LENGTH(str); StriContainerUTF16 str_cont(str, vectorize_length); UnicodeString format_str(format_val); UErrorCode status = U_ZERO_ERROR; if (format_cur >= 0) { DateFormat::EStyle style = DateFormat::kNone; switch (format_cur % 8) { case 0: style = DateFormat::kFull; break; case 1: style = DateFormat::kLong; break; case 2: style = DateFormat::kMedium; break; case 3: style = DateFormat::kShort; break; case 4: style = DateFormat::kFullRelative; break; case 5: style = DateFormat::kLongRelative; break; case 6: style = DateFormat::kMediumRelative; break; case 7: style = DateFormat::kShortRelative; break; default: style = DateFormat::kNone; break; } /* ICU 54.1: Relative time styles are not currently supported. */ switch (format_cur / 8) { case 0: fmt = DateFormat::createDateInstance(style, Locale::createFromName(locale_val)); break; case 1: fmt = DateFormat::createTimeInstance((DateFormat::EStyle)(style & ~DateFormat::kRelative), Locale::createFromName(locale_val)); break; case 2: fmt = DateFormat::createDateTimeInstance(style, (DateFormat::EStyle)(style & ~DateFormat::kRelative), Locale::createFromName(locale_val)); break; default: fmt = NULL; break; } } else fmt = new SimpleDateFormat(format_str, Locale::createFromName(locale_val), status); STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */}) status = U_ZERO_ERROR; cal = Calendar::createInstance(locale_val, status); STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */}) cal->adoptTimeZone(tz_val); tz_val = NULL; /* The Calendar takes ownership of the TimeZone. */ cal->setLenient(lenient_val); SEXP ret; STRI__PROTECT(ret = Rf_allocVector(REALSXP, vectorize_length)); for (R_len_t i=0; i<vectorize_length; ++i) { if (str_cont.isNA(i)) { REAL(ret)[i] = NA_REAL; continue; } status = U_ZERO_ERROR; ParsePosition pos; fmt->parse(str_cont.get(i), *cal, pos); if (pos.getErrorIndex() >= 0) REAL(ret)[i] = NA_REAL; else { status = U_ZERO_ERROR; REAL(ret)[i] = ((double)cal->getTime(status))/1000.0; if (U_FAILURE(status)) REAL(ret)[i] = NA_REAL; } } if (!isNull(tz)) Rf_setAttrib(ret, Rf_ScalarString(Rf_mkChar("tzone")), tz); stri__set_class_POSIXct(ret); if (tz_val) { delete tz_val; tz_val = NULL; } if (fmt) { delete fmt; fmt = NULL; } if (cal) { delete cal; cal = NULL; } STRI__UNPROTECT_ALL return ret; STRI__ERROR_HANDLER_END({ if (tz_val) { delete tz_val; tz_val = NULL; } if (fmt) { delete fmt; fmt = NULL; } if (cal) { delete cal; cal = NULL; } }) }