Exemple #1
0
void RelativeDateFormat::parse( const UnicodeString& text,
                    Calendar& cal,
                    ParsePosition& pos) const {

    int32_t startIndex = pos.getIndex();
    if (fDatePattern.isEmpty()) {
        // no date pattern, try parsing as time
        fDateTimeFormatter->applyPattern(fTimePattern);
        fDateTimeFormatter->parse(text,cal,pos);
    } else if (fTimePattern.isEmpty() || fCombinedFormat == NULL) {
        // no time pattern or way to combine, try parsing as date
        // first check whether text matches a relativeDayString
        UBool matchedRelative = FALSE;
        for (int n=0; n < fDatesLen && !matchedRelative; n++) {
            if (fDates[n].string != NULL &&
                    text.compare(startIndex, fDates[n].len, fDates[n].string) == 0) {
                // it matched, handle the relative day string
                UErrorCode status = U_ZERO_ERROR;
                matchedRelative = TRUE;

                // Set the calendar to now+offset
                cal.setTime(Calendar::getNow(),status);
                cal.add(UCAL_DATE,fDates[n].offset, status);

                if(U_FAILURE(status)) { 
                    // failure in setting calendar field, set offset to beginning of rel day string
                    pos.setErrorIndex(startIndex);
                } else {
                    pos.setIndex(startIndex + fDates[n].len);
                }
            }
        }
        if (!matchedRelative) {
            // just parse as normal date
            fDateTimeFormatter->applyPattern(fDatePattern);
            fDateTimeFormatter->parse(text,cal,pos);
        }
    } else {
        // Here we replace any relativeDayString in text with the equivalent date
        // formatted per fDatePattern, then parse text normally using the combined pattern.
        UnicodeString modifiedText(text);
        FieldPosition fPos;
        int32_t dateStart = 0, origDateLen = 0, modDateLen = 0;
        UErrorCode status = U_ZERO_ERROR;
        for (int n=0; n < fDatesLen; n++) {
            int32_t relativeStringOffset;
            if (fDates[n].string != NULL &&
                    (relativeStringOffset = modifiedText.indexOf(fDates[n].string, fDates[n].len, startIndex)) >= startIndex) {
                // it matched, replace the relative date with a real one for parsing
                UnicodeString dateString;
                Calendar * tempCal = cal.clone();

                // Set the calendar to now+offset
                tempCal->setTime(Calendar::getNow(),status);
                tempCal->add(UCAL_DATE,fDates[n].offset, status);
                if(U_FAILURE(status)) { 
                    pos.setErrorIndex(startIndex);
                    delete tempCal;
                    return;
                }

                fDateTimeFormatter->applyPattern(fDatePattern);
                fDateTimeFormatter->format(*tempCal, dateString, fPos);
                dateStart = relativeStringOffset;
                origDateLen = fDates[n].len;
                modDateLen = dateString.length();
                modifiedText.replace(dateStart, origDateLen, dateString);
                delete tempCal;
                break;
            }
        }
        UnicodeString combinedPattern;
        fCombinedFormat->format(fTimePattern, fDatePattern, combinedPattern, status);
        fDateTimeFormatter->applyPattern(combinedPattern);
        fDateTimeFormatter->parse(modifiedText,cal,pos);

        // Adjust offsets
        UBool noError = (pos.getErrorIndex() < 0);
        int32_t offset = (noError)? pos.getIndex(): pos.getErrorIndex();
        if (offset >= dateStart + modDateLen) {
            // offset at or after the end of the replaced text,
            // correct by the difference between original and replacement
            offset -= (modDateLen - origDateLen);
        } else if (offset >= dateStart) {
            // offset in the replaced text, set it to the beginning of that text
            // (i.e. the beginning of the relative day string)
            offset = dateStart;
        }
        if (noError) {
            pos.setIndex(offset);
        } else {
            pos.setErrorIndex(offset);
        }
    }
}
Exemple #2
0
void
TimeUnitFormat::parseObject(const UnicodeString& source,
                            Formattable& result,
                            ParsePosition& pos) const {
    Formattable resultNumber(0.0);
    UBool withNumberFormat = false;
    TimeUnit::UTimeUnitFields resultTimeUnit = TimeUnit::UTIMEUNIT_FIELD_COUNT;
    int32_t oldPos = pos.getIndex();
    int32_t newPos = -1;
    int32_t longestParseDistance = 0;
    UnicodeString* countOfLongestMatch = NULL;
#ifdef TMUTFMT_DEBUG
    char res[1000];
    source.extract(0, source.length(), res, "UTF-8");
    std::cout << "parse source: " << res << "\n";
#endif
    // parse by iterating through all available patterns
    // and looking for the longest match.
    for (TimeUnit::UTimeUnitFields i = TimeUnit::UTIMEUNIT_YEAR;
         i < TimeUnit::UTIMEUNIT_FIELD_COUNT;
         i = (TimeUnit::UTimeUnitFields)(i+1)) {
        Hashtable* countToPatterns = fTimeUnitToCountToPatterns[i];
        int32_t elemPos = UHASH_FIRST;
        const UHashElement* elem = NULL;
        while ((elem = countToPatterns->nextElement(elemPos)) != NULL){
            const UHashTok keyTok = elem->key;
            UnicodeString* count = (UnicodeString*)keyTok.pointer;
#ifdef TMUTFMT_DEBUG
            count->extract(0, count->length(), res, "UTF-8");
            std::cout << "parse plural count: " << res << "\n";
#endif
            const UHashTok valueTok = elem->value;
            // the value is a pair of MessageFormat*
            MessageFormat** patterns = (MessageFormat**)valueTok.pointer;
            for (UTimeUnitFormatStyle style = UTMUTFMT_FULL_STYLE; style < UTMUTFMT_FORMAT_STYLE_COUNT;
                 style = (UTimeUnitFormatStyle)(style + 1)) {
                MessageFormat* pattern = patterns[style];
                pos.setErrorIndex(-1);
                pos.setIndex(oldPos);
                // see if we can parse
                Formattable parsed;
                pattern->parseObject(source, parsed, pos);
                if (pos.getErrorIndex() != -1 || pos.getIndex() == oldPos) {
                    continue;
                }
    #ifdef TMUTFMT_DEBUG
                std::cout << "parsed.getType: " << parsed.getType() << "\n";
    #endif
                Formattable tmpNumber(0.0);
                if (pattern->getArgTypeCount() != 0) {
                    Formattable& temp = parsed[0];
                    if (temp.getType() == Formattable::kString) {
                        UnicodeString tmpString;
                        UErrorCode pStatus = U_ZERO_ERROR;
                        getNumberFormat().parse(temp.getString(tmpString), tmpNumber, pStatus);
                        if (U_FAILURE(pStatus)) {
                            continue;
                        }
                    } else if (temp.isNumeric()) {
                        tmpNumber = temp;
                    } else {
                        continue;
                    }
                }
                int32_t parseDistance = pos.getIndex() - oldPos;
                if (parseDistance > longestParseDistance) {
                    if (pattern->getArgTypeCount() != 0) {
                        resultNumber = tmpNumber;
                        withNumberFormat = true;
                    } else {
                        withNumberFormat = false;
                    }
                    resultTimeUnit = i;
                    newPos = pos.getIndex();
                    longestParseDistance = parseDistance;
                    countOfLongestMatch = count;
                }
            }
        }
    }
    /* After find the longest match, parse the number.
     * Result number could be null for the pattern without number pattern.
     * such as unit pattern in Arabic.
     * When result number is null, use plural rule to set the number.
     */
    if (withNumberFormat == false && longestParseDistance != 0) {
        // set the number using plurrual count
        if (0 == countOfLongestMatch->compare(PLURAL_COUNT_ZERO, 4)) {
            resultNumber = Formattable(0.0);
        } else if (0 == countOfLongestMatch->compare(PLURAL_COUNT_ONE, 3)) {
            resultNumber = Formattable(1.0);
        } else if (0 == countOfLongestMatch->compare(PLURAL_COUNT_TWO, 3)) {
            resultNumber = Formattable(2.0);
        } else {
            // should not happen.
            // TODO: how to handle?
            resultNumber = Formattable(3.0);
        }
    }
    if (longestParseDistance == 0) {
        pos.setIndex(oldPos);
        pos.setErrorIndex(0);
    } else {
        UErrorCode status = U_ZERO_ERROR;
        TimeUnitAmount* tmutamt = new TimeUnitAmount(resultNumber, resultTimeUnit, status);
        if (U_SUCCESS(status)) {
            result.adoptObject(tmutamt);
            pos.setIndex(newPos);
            pos.setErrorIndex(-1);
        } else {
            pos.setIndex(oldPos);
            pos.setErrorIndex(0);
        }
    }
}
void 
TimeUnitFormat::parseObject(const UnicodeString& source, 
                            Formattable& result,
                            ParsePosition& pos) const {
    double resultNumber = -1; 
    UBool withNumberFormat = false;
    TimeUnit::UTimeUnitFields resultTimeUnit = TimeUnit::UTIMEUNIT_FIELD_COUNT;
    int32_t oldPos = pos.getIndex();
    int32_t newPos = -1;
    int32_t longestParseDistance = 0;
    UnicodeString* countOfLongestMatch = NULL;
#ifdef TMUTFMT_DEBUG
    char res[1000];
    source.extract(0, source.length(), res, "UTF-8");
    std::cout << "parse source: " << res << "\n";           
#endif
    // parse by iterating through all available patterns
    // and looking for the longest match.
    for (TimeUnit::UTimeUnitFields i = TimeUnit::UTIMEUNIT_YEAR;
         i < TimeUnit::UTIMEUNIT_FIELD_COUNT;
         i = (TimeUnit::UTimeUnitFields)(i+1)) {
        Hashtable* countToPatterns = fTimeUnitToCountToPatterns[i];
        int32_t elemPos = -1;
        const UHashElement* elem = NULL;
        while ((elem = countToPatterns->nextElement(elemPos)) != NULL){
            const UHashTok keyTok = elem->key;
            UnicodeString* count = (UnicodeString*)keyTok.pointer;
#ifdef TMUTFMT_DEBUG
            count->extract(0, count->length(), res, "UTF-8");
            std::cout << "parse plural count: " << res << "\n";           
#endif
            const UHashTok valueTok = elem->value;
            // the value is a pair of MessageFormat*
            MessageFormat** patterns = (MessageFormat**)valueTok.pointer;
            for (EStyle style = kFull; style < kTotal; style = (EStyle)(style + 1)) {
                MessageFormat* pattern = patterns[style];
                pos.setErrorIndex(-1);
                pos.setIndex(oldPos);
                // see if we can parse
                Formattable parsed;
                pattern->parseObject(source, parsed, pos);
                if (pos.getErrorIndex() != -1 || pos.getIndex() == oldPos) {
                    continue;
                }
    #ifdef TMUTFMT_DEBUG
                std::cout << "parsed.getType: " << parsed.getType() << "\n";
    #endif
                double tmpNumber = 0;
                if (pattern->getArgTypeCount() != 0) {
                    // pattern with Number as beginning, such as "{0} d".
                    // check to make sure that the timeUnit is consistent
                    Formattable& temp = parsed[0];
                    if (temp.getType() == Formattable::kDouble) {
                        tmpNumber = temp.getDouble();
                    } else if (temp.getType() == Formattable::kLong) {
                        tmpNumber = temp.getLong();
                    } else {
                        continue;
                    }
                    UnicodeString select = fPluralRules->select(tmpNumber);
    #ifdef TMUTFMT_DEBUG
                    select.extract(0, select.length(), res, "UTF-8");
                    std::cout << "parse plural select count: " << res << "\n"; 
    #endif
                    if (*count != select) {
                        continue;
                    }
                }
                int32_t parseDistance = pos.getIndex() - oldPos;
                if (parseDistance > longestParseDistance) {
                    if (pattern->getArgTypeCount() != 0) {
                        resultNumber = tmpNumber;
                        withNumberFormat = true;
                    } else {
                        withNumberFormat = false;
                    }
                    resultTimeUnit = i;
                    newPos = pos.getIndex();
                    longestParseDistance = parseDistance;
                    countOfLongestMatch = count;
                }
            }
        }
    }
    /* After find the longest match, parse the number.
     * Result number could be null for the pattern without number pattern.
     * such as unit pattern in Arabic.
     * When result number is null, use plural rule to set the number.
     */
    if (withNumberFormat == false && longestParseDistance != 0) {
        // set the number using plurrual count
        if ( *countOfLongestMatch == PLURAL_COUNT_ZERO ) {
            resultNumber = 0;
        } else if ( *countOfLongestMatch == PLURAL_COUNT_ONE ) {
            resultNumber = 1;
        } else if ( *countOfLongestMatch == PLURAL_COUNT_TWO ) {
            resultNumber = 2;
        } else {
            // should not happen.
            // TODO: how to handle?
            resultNumber = 3;
        }
    }
    if (longestParseDistance == 0) {
        pos.setIndex(oldPos);
        pos.setErrorIndex(0);
    } else {
        UErrorCode status = U_ZERO_ERROR;
        TimeUnitAmount* tmutamt = new TimeUnitAmount(resultNumber, resultTimeUnit, status);
        if (U_SUCCESS(status)) {
            result.adoptObject(tmutamt);
            pos.setIndex(newPos);
            pos.setErrorIndex(-1);
        } else {
            pos.setIndex(oldPos);
            pos.setErrorIndex(0);
        }
    }
}
static jobject parseRBNFImpl(JNIEnv *env, jclass clazz, jint addr, jstring text, 
        jobject position, jboolean lenient) {

    // LOGI("ENTER parseRBNFImpl");

    const char * parsePositionClassName = "java/text/ParsePosition";
    const char * longClassName = "java/lang/Long";
    const char * doubleClassName = "java/lang/Double";


    UErrorCode status = U_ZERO_ERROR;

    UNumberFormat *fmt = (UNumberFormat *)(int)addr;

    jchar *str = (UChar *)env->GetStringChars(text, NULL);
    int strlength = env->GetStringLength(text);

    jclass parsePositionClass = env->FindClass(parsePositionClassName);
    jclass longClass =  env->FindClass(longClassName);
    jclass doubleClass =  env->FindClass(doubleClassName);

    jmethodID getIndexMethodID = env->GetMethodID(parsePositionClass, 
            "getIndex", "()I");
    jmethodID setIndexMethodID = env->GetMethodID(parsePositionClass, 
            "setIndex", "(I)V");
    jmethodID setErrorIndexMethodID = env->GetMethodID(parsePositionClass, 
            "setErrorIndex", "(I)V");

    jmethodID longInitMethodID = env->GetMethodID(longClass, "<init>", "(J)V");
    jmethodID dblInitMethodID = env->GetMethodID(doubleClass, "<init>", "(D)V");

    int parsePos = env->CallIntMethod(position, getIndexMethodID, NULL);

    // make sure the ParsePosition is valid. Actually icu4c would parse a number 
    // correctly even if the parsePosition is set to -1, but since the RI fails 
    // for that case we have to fail too
    if(parsePos < 0 || parsePos > strlength) {
        return NULL;
    }

    Formattable res;

    const UnicodeString src((UChar*)str, strlength, strlength);
    ParsePosition pp;
    
    pp.setIndex(parsePos);
    
    if(lenient) {
        unum_setAttribute(fmt, UNUM_LENIENT_PARSE, JNI_TRUE);
    }
    
    ((const NumberFormat*)fmt)->parse(src, res, pp);

    if(lenient) {
        unum_setAttribute(fmt, UNUM_LENIENT_PARSE, JNI_FALSE);
    }
    
    env->ReleaseStringChars(text, str);

    if(pp.getErrorIndex() == -1) {
        parsePos = pp.getIndex();
    } else {
        env->CallVoidMethod(position, setErrorIndexMethodID, 
                (jint) pp.getErrorIndex());        
        return NULL;
    }

    Formattable::Type numType;
    numType = res.getType();
    UErrorCode fmtStatus;

    double resultDouble;
    long resultLong;
    int64_t resultInt64;

    switch(numType) {
        case Formattable::kDouble:
            resultDouble = res.getDouble();
            env->CallVoidMethod(position, setIndexMethodID, (jint) parsePos);
            return env->NewObject(doubleClass, dblInitMethodID, 
                    (jdouble) resultDouble);
        case Formattable::kLong:
            resultLong = res.getLong();
            env->CallVoidMethod(position, setIndexMethodID, (jint) parsePos);
            return env->NewObject(longClass, longInitMethodID, 
                    (jlong) resultLong);
        case Formattable::kInt64:
            resultInt64 = res.getInt64();
            env->CallVoidMethod(position, setIndexMethodID, (jint) parsePos);
            return env->NewObject(longClass, longInitMethodID, 
                    (jlong) resultInt64);
        default:
            break;
    }

    return NULL;
}
/**
 * Parse date-time objects
 *
 * @param str
 * @param format
 * @param tz
 * @param lenient
 * @param locale
 *
 * @return character vector
 *
 * @version 0.5-1 (Marek Gagolewski, 2015-01-08)
 * @version 0.5-1 (Marek Gagolewski, 2015-01-11) lenient arg added
 * @version 0.5-1 (Marek Gagolewski, 2015-02-22) use tz
 * @version 0.5-1 (Marek Gagolewski, 2015-03-01) set tzone attrib on retval
 */
SEXP stri_datetime_parse(SEXP str, SEXP format, SEXP lenient, SEXP tz, SEXP locale) {
   PROTECT(str = stri_prepare_arg_string(str, "str"));
   const char* locale_val = stri__prepare_arg_locale(locale, "locale", true);
   const char* format_val = stri__prepare_arg_string_1_notNA(format, "format");
   bool lenient_val = stri__prepare_arg_logical_1_notNA(lenient, "lenient");
   if (!isNull(tz)) PROTECT(tz = stri_prepare_arg_string_1(tz, "tz"));
   else             PROTECT(tz); /* needed to set tzone attrib */

   // "format" may be one of:
   const char* format_opts[] = {
      "date_full", "date_long", "date_medium", "date_short",
      "date_relative_full", "date_relative_long", "date_relative_medium", "date_relative_short",
      "time_full", "time_long", "time_medium", "time_short",
      "time_relative_full", "time_relative_long", "time_relative_medium", "time_relative_short",
      "datetime_full", "datetime_long", "datetime_medium", "datetime_short",
      "datetime_relative_full", "datetime_relative_long", "datetime_relative_medium", "datetime_relative_short",
      NULL};
   int format_cur = stri__match_arg(format_val, format_opts);

   TimeZone* tz_val = stri__prepare_arg_timezone(tz, "tz", true/*allowdefault*/);
   Calendar* cal = NULL;
   DateFormat* fmt = NULL;
   STRI__ERROR_HANDLER_BEGIN(2)
   R_len_t vectorize_length = LENGTH(str);
   StriContainerUTF16 str_cont(str, vectorize_length);
   UnicodeString format_str(format_val);

   UErrorCode status = U_ZERO_ERROR;
   if (format_cur >= 0) {
      DateFormat::EStyle style = DateFormat::kNone;
      switch (format_cur % 8) {
         case 0:  style = DateFormat::kFull; break;
         case 1:  style = DateFormat::kLong; break;
         case 2:  style = DateFormat::kMedium; break;
         case 3:  style = DateFormat::kShort; break;
         case 4:  style = DateFormat::kFullRelative; break;
         case 5:  style = DateFormat::kLongRelative; break;
         case 6:  style = DateFormat::kMediumRelative; break;
         case 7:  style = DateFormat::kShortRelative; break;
         default: style = DateFormat::kNone; break;
      }

      /* ICU 54.1: Relative time styles are not currently supported.  */
      switch (format_cur / 8) {
         case 0:
            fmt = DateFormat::createDateInstance(style, Locale::createFromName(locale_val));
            break;

         case 1:
            fmt = DateFormat::createTimeInstance((DateFormat::EStyle)(style & ~DateFormat::kRelative),
               Locale::createFromName(locale_val));
            break;

         case 2:
            fmt = DateFormat::createDateTimeInstance(style,
               (DateFormat::EStyle)(style & ~DateFormat::kRelative),
               Locale::createFromName(locale_val));
            break;

         default:
            fmt = NULL;
            break;

      }
   }
   else
      fmt = new SimpleDateFormat(format_str, Locale::createFromName(locale_val), status);
   STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})

   status = U_ZERO_ERROR;
   cal = Calendar::createInstance(locale_val, status);
   STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})

   cal->adoptTimeZone(tz_val);
   tz_val = NULL; /* The Calendar takes ownership of the TimeZone. */

   cal->setLenient(lenient_val);

   SEXP ret;
   STRI__PROTECT(ret = Rf_allocVector(REALSXP, vectorize_length));
   for (R_len_t i=0; i<vectorize_length; ++i) {
      if (str_cont.isNA(i)) {
         REAL(ret)[i] = NA_REAL;
         continue;
      }

      status = U_ZERO_ERROR;
      ParsePosition pos;
      fmt->parse(str_cont.get(i), *cal, pos);

      if (pos.getErrorIndex() >= 0)
         REAL(ret)[i] = NA_REAL;
      else {
         status = U_ZERO_ERROR;
         REAL(ret)[i] = ((double)cal->getTime(status))/1000.0;
         if (U_FAILURE(status)) REAL(ret)[i] = NA_REAL;
      }
   }


   if (!isNull(tz)) Rf_setAttrib(ret, Rf_ScalarString(Rf_mkChar("tzone")), tz);
   stri__set_class_POSIXct(ret);
   if (tz_val) { delete tz_val; tz_val = NULL; }
   if (fmt) { delete fmt; fmt = NULL; }
   if (cal) { delete cal; cal = NULL; }
   STRI__UNPROTECT_ALL
   return ret;
   STRI__ERROR_HANDLER_END({
      if (tz_val) { delete tz_val; tz_val = NULL; }
      if (fmt) { delete fmt; fmt = NULL; }
      if (cal) { delete cal; cal = NULL; }
   })
}