/**
 * Substitutes vector elements if a pattern occurs in a string
 *
 * @param str character vector
 * @param pattern character vector
 * @param value character vector
 * @return character vector
 *
 * @version 1.0-3 (Marek Gagolewski, 2016-02-03)
 *   FR#124
 *
 * @version 1.0-3 (Marek Gagolewski, 2016-02-03)
 *    FR #216: `negate` arg added
 */
SEXP stri_subset_charclass_replacement(SEXP str, SEXP pattern, SEXP negate, SEXP value)
{
   bool negate_1 = stri__prepare_arg_logical_1_notNA(negate, "negate");
   PROTECT(str = stri_prepare_arg_string(str, "str"));
   PROTECT(pattern = stri_prepare_arg_string_1(pattern, "pattern"));
   PROTECT(value = stri_prepare_arg_string(value, "value"));

   int vectorize_length = LENGTH(str);
   int value_length = LENGTH(value);
   if (value_length == 0)
      Rf_error(MSG__REPLACEMENT_ZERO);

   STRI__ERROR_HANDLER_BEGIN(3)
   StriContainerUTF8 str_cont(str, vectorize_length);
   StriContainerUTF8 value_cont(value, value_length);
   StriContainerCharClass pattern_cont(pattern, vectorize_length);

   SEXP ret;
   STRI__PROTECT(ret = Rf_allocVector(STRSXP, vectorize_length));

   R_len_t k = 0;
   for (R_len_t i = str_cont.vectorize_init();
         i != str_cont.vectorize_end();
         i = str_cont.vectorize_next(i))
   {
      if (str_cont.isNA(i) || pattern_cont.isNA(i)) {
         SET_STRING_ELT(ret, i, NA_STRING);
         continue;
      }

      const UnicodeSet* pattern_cur = &pattern_cont.get(i);
      R_len_t     str_cur_n = str_cont.get(i).length();
      const char* str_cur_s = str_cont.get(i).c_str();

      UChar32 chr = 0;
      bool found = false;
      for (R_len_t j=0; j<str_cur_n; ) {
         U8_NEXT(str_cur_s, j, str_cur_n, chr);
         if (chr < 0) // invalid utf-8 sequence
            throw StriException(MSG__INVALID_UTF8);
         if (pattern_cur->contains(chr)) {
            found = true;
            break;
         }
      }

      if ((found && !negate_1) || (!found && negate_1))
         SET_STRING_ELT(ret, i, value_cont.toR((k++)%value_length));
      else
         SET_STRING_ELT(ret, i, str_cont.toR(i));
   }

   STRI__UNPROTECT_ALL
   return ret;
   STRI__ERROR_HANDLER_END(;/* nothing special to be done on error */)
}
Beispiel #2
0
/**
 * Substring replacement function
 *
 *
 * @param str character vector
 * @param from integer vector (possibly with negative indices)
 * @param to integer vector (possibly with negative indices) or NULL
 * @param length integer vector or NULL
 * @param omit_na logical scalar
 * @param value character vector replacement
 * @return character vector
 *
 * @version 0.1-?? (Bartek Tartanus)
 *
 * @version 0.1-?? (Marek Gagolewski)
 *          use StriContainerUTF8 and stri__UChar32_to_UTF8_index
 *
 * @version 0.1-?? (Marek Gagolewski, 2013-06-01)
 *          use StriContainerUTF8's UChar32-to-UTF8 index
 *
 * @version 0.1-?? (Marek Gagolewski, 2013-06-16)
 *          make StriException-friendly
 *
 * @version 0.2-1 (Marek Gagolewski, 2014-03-20)
 *          Use StriContainerUTF8_indexable
 *
 * @version 0.2-1 (Marek Gagolewski, 2014-04-03)
 *          Use stri__sub_prepare_from_to_length()
 *
 * @version 0.3-1 (Marek Gagolewski, 2014-11-04)
 *    Issue #112: str_prepare_arg* retvals were not PROTECTed from gc
 *
 * @version 0.5-9003 (Marek Gagolewski, 2015-08-05)
 *    Bugfix #183: floating point exception when to or length is an empty vector
 *
 * @version 1.0-2 (Marek Gagolewski, 2016-01-31)
 *    FR #199: new arg: `omit_na`
 *    FR #207: allow insertions
 *
 *
 * @version 1.4.3 (Marek Gagolewski, 2019-03-12)
 *    #346: na_omit for `value`
 */
SEXP stri_sub_replacement(SEXP str, SEXP from, SEXP to, SEXP length, SEXP omit_na, SEXP value)
{
   PROTECT(str   = stri_prepare_arg_string(str, "str"));
   PROTECT(value = stri_prepare_arg_string(value, "value"));
   bool omit_na_1 = stri__prepare_arg_logical_1_notNA(omit_na, "omit_na");

   R_len_t value_len     = LENGTH(value);
   R_len_t str_len       = LENGTH(str);
   R_len_t from_len      = 0; // see below
   R_len_t to_len        = 0; // see below
   R_len_t length_len    = 0; // see below
   int* from_tab         = 0; // see below
   int* to_tab           = 0; // see below
   int* length_tab       = 0; // see below

   R_len_t sub_protected =  2+ /* how many objects to PROTECT on ret? */
      stri__sub_prepare_from_to_length(from, to, length,
      from_len, to_len, length_len, from_tab, to_tab, length_tab);

   R_len_t vectorize_len = stri__recycling_rule(true, 4,
      str_len, value_len, from_len, (to_len>length_len)?to_len:length_len);

   if (vectorize_len <= 0) {
      UNPROTECT(sub_protected);
      return Rf_allocVector(STRSXP, 0);
   }

   STRI__ERROR_HANDLER_BEGIN(sub_protected)
   StriContainerUTF8_indexable str_cont(str, vectorize_len);
   StriContainerUTF8 value_cont(value, vectorize_len);
   SEXP ret;
   STRI__PROTECT(ret = Rf_allocVector(STRSXP, vectorize_len));
   String8buf buf(0); // @TODO: estimate bufsize a priori

   for (R_len_t i = str_cont.vectorize_init();
         i != str_cont.vectorize_end();
         i = str_cont.vectorize_next(i))
   {
      R_len_t cur_from     = from_tab[i % from_len];
      R_len_t cur_to       = (to_tab)?to_tab[i % to_len]:length_tab[i % length_len];

      if (str_cont.isNA(i)) {
          SET_STRING_ELT(ret, i, NA_STRING);
          continue;
      }
      if (cur_from == NA_INTEGER || cur_to == NA_INTEGER || value_cont.isNA(i)) {
         if (omit_na_1) {
            SET_STRING_ELT(ret, i, str_cont.toR(i));
         }
         else {
            SET_STRING_ELT(ret, i, NA_STRING);
         }
         continue;
      }

      if (length_tab) {
         if (cur_to <= 0) {
            // SET_STRING_ELT(ret, i, R_BlankString);
            // continue;
            cur_to = 0;
         }
         else {
            cur_to = cur_from + cur_to - 1;
            if (cur_from < 0 && cur_to >= 0) cur_to = -1;
         }
      }

      const char* str_cur_s   = str_cont.get(i).c_str();
      R_len_t str_cur_n       = str_cont.get(i).length();
      const char* value_cur_s = value_cont.get(i).c_str();
      R_len_t value_cur_n     = value_cont.get(i).length();

      R_len_t cur_from2; // UTF-8 byte indices
      R_len_t cur_to2;   // UTF-8 byte indices

      stri__sub_get_indices(str_cont, i, cur_from, cur_to, cur_from2, cur_to2);
      if (cur_to2 < cur_from2) cur_to2 = cur_from2;

      R_len_t buflen = str_cur_n-(cur_to2-cur_from2)+value_cur_n;
      buf.resize(buflen, false/*destroy contents*/);
      memcpy(buf.data(), str_cur_s, (size_t)cur_from2);
      memcpy(buf.data()+cur_from2, value_cur_s, (size_t)value_cur_n);
      memcpy(buf.data()+cur_from2+value_cur_n, str_cur_s+cur_to2, (size_t)str_cur_n-cur_to2);
      SET_STRING_ELT(ret, i, Rf_mkCharLenCE(buf.data(), buflen, CE_UTF8));
   }

   STRI__UNPROTECT_ALL
   return ret;
   STRI__ERROR_HANDLER_END(;/* nothing special to be done on error */)
}
Beispiel #3
0
/** Date-time artithmetic
 *
 * @param time
 * @param value
 * @param units
 * @param tz
 * @param locale
 *
 * @return POSIXst
 *
 * @version 0.5-1 (Marek Gagolewski, 2014-12-30)
 * @version 0.5-1 (Marek Gagolewski, 2015-03-06) tz arg added
 */
SEXP stri_datetime_add(SEXP time, SEXP value, SEXP units, SEXP tz, SEXP locale) {
   PROTECT(time = stri_prepare_arg_POSIXct(time, "time"));
   PROTECT(value = stri_prepare_arg_integer(value, "value"));
   if (!isNull(tz)) PROTECT(tz = stri_prepare_arg_string_1(tz, "tz"));
   else             PROTECT(tz); /* needed to set tzone attrib */

   R_len_t vectorize_length = stri__recycling_rule(true, 2, LENGTH(time), LENGTH(value));

   const char* units_val = stri__prepare_arg_string_1_notNA(units, "units");
   const char* units_opts[] = {"years", "months", "weeks", "days", "hours", "minutes", "seconds", "milliseconds", NULL};
   int units_cur = stri__match_arg(units_val, units_opts);

   const char* locale_val = stri__prepare_arg_locale(locale, "locale", true);

   TimeZone* tz_val = stri__prepare_arg_timezone(tz, "tz", true/*allowdefault*/);

   Calendar* cal = NULL;
   STRI__ERROR_HANDLER_BEGIN(3)
   StriContainerDouble time_cont(time, vectorize_length);
   StriContainerInteger value_cont(value, vectorize_length);

   UCalendarDateFields units_field;
   switch (units_cur) {
      case 0: units_field = UCAL_YEAR;                   break;
      case 1: units_field = UCAL_MONTH;                  break;
      case 2: units_field = UCAL_WEEK_OF_YEAR;           break;
      case 3: units_field = UCAL_DAY_OF_MONTH;           break;
      case 4: units_field = UCAL_HOUR_OF_DAY;            break;
      case 5: units_field = UCAL_MINUTE;                 break;
      case 6: units_field = UCAL_SECOND;                 break;
      case 7: units_field = UCAL_MILLISECOND;            break;
      default: throw StriException(MSG__INCORRECT_MATCH_OPTION, "units");
   }

   UErrorCode status = U_ZERO_ERROR;
   cal = Calendar::createInstance(locale_val, status);
   STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})

   cal->adoptTimeZone(tz_val);
   tz_val = NULL; /* The Calendar takes ownership of the TimeZone. */

   SEXP ret;
   STRI__PROTECT(ret = Rf_allocVector(REALSXP, vectorize_length));
   double* ret_val = REAL(ret);
   for (R_len_t i=0; i<vectorize_length; ++i) {
      if (time_cont.isNA(i) || value_cont.isNA(i)) {
         ret_val[i] = NA_REAL;
         continue;
      }
      status = U_ZERO_ERROR;
      cal->setTime((UDate)(time_cont.get(i)*1000.0), status);
      STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})

      status = U_ZERO_ERROR;
      cal->add(units_field, value_cont.get(i), status);
      STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})

      status = U_ZERO_ERROR;
      ret_val[i] = ((double)cal->getTime(status))/1000.0;
      STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})
   }

   if (!isNull(tz)) Rf_setAttrib(ret, Rf_ScalarString(Rf_mkChar("tzone")), tz);
   stri__set_class_POSIXct(ret);
   if (tz_val) { delete tz_val; tz_val = NULL; }
   if (cal) { delete cal; cal = NULL; }
   STRI__UNPROTECT_ALL
   return ret;
   STRI__ERROR_HANDLER_END({
      if (tz_val) { delete tz_val; tz_val = NULL; }
      if (cal) { delete cal; cal = NULL; }
   })
}