/** * Substitutes vector elements if a pattern occurs in a string * * @param str character vector * @param pattern character vector * @param value character vector * @return character vector * * @version 1.0-3 (Marek Gagolewski, 2016-02-03) * FR#124 * * @version 1.0-3 (Marek Gagolewski, 2016-02-03) * FR #216: `negate` arg added */ SEXP stri_subset_charclass_replacement(SEXP str, SEXP pattern, SEXP negate, SEXP value) { bool negate_1 = stri__prepare_arg_logical_1_notNA(negate, "negate"); PROTECT(str = stri_prepare_arg_string(str, "str")); PROTECT(pattern = stri_prepare_arg_string_1(pattern, "pattern")); PROTECT(value = stri_prepare_arg_string(value, "value")); int vectorize_length = LENGTH(str); int value_length = LENGTH(value); if (value_length == 0) Rf_error(MSG__REPLACEMENT_ZERO); STRI__ERROR_HANDLER_BEGIN(3) StriContainerUTF8 str_cont(str, vectorize_length); StriContainerUTF8 value_cont(value, value_length); StriContainerCharClass pattern_cont(pattern, vectorize_length); SEXP ret; STRI__PROTECT(ret = Rf_allocVector(STRSXP, vectorize_length)); R_len_t k = 0; for (R_len_t i = str_cont.vectorize_init(); i != str_cont.vectorize_end(); i = str_cont.vectorize_next(i)) { if (str_cont.isNA(i) || pattern_cont.isNA(i)) { SET_STRING_ELT(ret, i, NA_STRING); continue; } const UnicodeSet* pattern_cur = &pattern_cont.get(i); R_len_t str_cur_n = str_cont.get(i).length(); const char* str_cur_s = str_cont.get(i).c_str(); UChar32 chr = 0; bool found = false; for (R_len_t j=0; j<str_cur_n; ) { U8_NEXT(str_cur_s, j, str_cur_n, chr); if (chr < 0) // invalid utf-8 sequence throw StriException(MSG__INVALID_UTF8); if (pattern_cur->contains(chr)) { found = true; break; } } if ((found && !negate_1) || (!found && negate_1)) SET_STRING_ELT(ret, i, value_cont.toR((k++)%value_length)); else SET_STRING_ELT(ret, i, str_cont.toR(i)); } STRI__UNPROTECT_ALL return ret; STRI__ERROR_HANDLER_END(;/* nothing special to be done on error */) }
/** * Substring replacement function * * * @param str character vector * @param from integer vector (possibly with negative indices) * @param to integer vector (possibly with negative indices) or NULL * @param length integer vector or NULL * @param omit_na logical scalar * @param value character vector replacement * @return character vector * * @version 0.1-?? (Bartek Tartanus) * * @version 0.1-?? (Marek Gagolewski) * use StriContainerUTF8 and stri__UChar32_to_UTF8_index * * @version 0.1-?? (Marek Gagolewski, 2013-06-01) * use StriContainerUTF8's UChar32-to-UTF8 index * * @version 0.1-?? (Marek Gagolewski, 2013-06-16) * make StriException-friendly * * @version 0.2-1 (Marek Gagolewski, 2014-03-20) * Use StriContainerUTF8_indexable * * @version 0.2-1 (Marek Gagolewski, 2014-04-03) * Use stri__sub_prepare_from_to_length() * * @version 0.3-1 (Marek Gagolewski, 2014-11-04) * Issue #112: str_prepare_arg* retvals were not PROTECTed from gc * * @version 0.5-9003 (Marek Gagolewski, 2015-08-05) * Bugfix #183: floating point exception when to or length is an empty vector * * @version 1.0-2 (Marek Gagolewski, 2016-01-31) * FR #199: new arg: `omit_na` * FR #207: allow insertions * * * @version 1.4.3 (Marek Gagolewski, 2019-03-12) * #346: na_omit for `value` */ SEXP stri_sub_replacement(SEXP str, SEXP from, SEXP to, SEXP length, SEXP omit_na, SEXP value) { PROTECT(str = stri_prepare_arg_string(str, "str")); PROTECT(value = stri_prepare_arg_string(value, "value")); bool omit_na_1 = stri__prepare_arg_logical_1_notNA(omit_na, "omit_na"); R_len_t value_len = LENGTH(value); R_len_t str_len = LENGTH(str); R_len_t from_len = 0; // see below R_len_t to_len = 0; // see below R_len_t length_len = 0; // see below int* from_tab = 0; // see below int* to_tab = 0; // see below int* length_tab = 0; // see below R_len_t sub_protected = 2+ /* how many objects to PROTECT on ret? */ stri__sub_prepare_from_to_length(from, to, length, from_len, to_len, length_len, from_tab, to_tab, length_tab); R_len_t vectorize_len = stri__recycling_rule(true, 4, str_len, value_len, from_len, (to_len>length_len)?to_len:length_len); if (vectorize_len <= 0) { UNPROTECT(sub_protected); return Rf_allocVector(STRSXP, 0); } STRI__ERROR_HANDLER_BEGIN(sub_protected) StriContainerUTF8_indexable str_cont(str, vectorize_len); StriContainerUTF8 value_cont(value, vectorize_len); SEXP ret; STRI__PROTECT(ret = Rf_allocVector(STRSXP, vectorize_len)); String8buf buf(0); // @TODO: estimate bufsize a priori for (R_len_t i = str_cont.vectorize_init(); i != str_cont.vectorize_end(); i = str_cont.vectorize_next(i)) { R_len_t cur_from = from_tab[i % from_len]; R_len_t cur_to = (to_tab)?to_tab[i % to_len]:length_tab[i % length_len]; if (str_cont.isNA(i)) { SET_STRING_ELT(ret, i, NA_STRING); continue; } if (cur_from == NA_INTEGER || cur_to == NA_INTEGER || value_cont.isNA(i)) { if (omit_na_1) { SET_STRING_ELT(ret, i, str_cont.toR(i)); } else { SET_STRING_ELT(ret, i, NA_STRING); } continue; } if (length_tab) { if (cur_to <= 0) { // SET_STRING_ELT(ret, i, R_BlankString); // continue; cur_to = 0; } else { cur_to = cur_from + cur_to - 1; if (cur_from < 0 && cur_to >= 0) cur_to = -1; } } const char* str_cur_s = str_cont.get(i).c_str(); R_len_t str_cur_n = str_cont.get(i).length(); const char* value_cur_s = value_cont.get(i).c_str(); R_len_t value_cur_n = value_cont.get(i).length(); R_len_t cur_from2; // UTF-8 byte indices R_len_t cur_to2; // UTF-8 byte indices stri__sub_get_indices(str_cont, i, cur_from, cur_to, cur_from2, cur_to2); if (cur_to2 < cur_from2) cur_to2 = cur_from2; R_len_t buflen = str_cur_n-(cur_to2-cur_from2)+value_cur_n; buf.resize(buflen, false/*destroy contents*/); memcpy(buf.data(), str_cur_s, (size_t)cur_from2); memcpy(buf.data()+cur_from2, value_cur_s, (size_t)value_cur_n); memcpy(buf.data()+cur_from2+value_cur_n, str_cur_s+cur_to2, (size_t)str_cur_n-cur_to2); SET_STRING_ELT(ret, i, Rf_mkCharLenCE(buf.data(), buflen, CE_UTF8)); } STRI__UNPROTECT_ALL return ret; STRI__ERROR_HANDLER_END(;/* nothing special to be done on error */) }
/** Date-time artithmetic * * @param time * @param value * @param units * @param tz * @param locale * * @return POSIXst * * @version 0.5-1 (Marek Gagolewski, 2014-12-30) * @version 0.5-1 (Marek Gagolewski, 2015-03-06) tz arg added */ SEXP stri_datetime_add(SEXP time, SEXP value, SEXP units, SEXP tz, SEXP locale) { PROTECT(time = stri_prepare_arg_POSIXct(time, "time")); PROTECT(value = stri_prepare_arg_integer(value, "value")); if (!isNull(tz)) PROTECT(tz = stri_prepare_arg_string_1(tz, "tz")); else PROTECT(tz); /* needed to set tzone attrib */ R_len_t vectorize_length = stri__recycling_rule(true, 2, LENGTH(time), LENGTH(value)); const char* units_val = stri__prepare_arg_string_1_notNA(units, "units"); const char* units_opts[] = {"years", "months", "weeks", "days", "hours", "minutes", "seconds", "milliseconds", NULL}; int units_cur = stri__match_arg(units_val, units_opts); const char* locale_val = stri__prepare_arg_locale(locale, "locale", true); TimeZone* tz_val = stri__prepare_arg_timezone(tz, "tz", true/*allowdefault*/); Calendar* cal = NULL; STRI__ERROR_HANDLER_BEGIN(3) StriContainerDouble time_cont(time, vectorize_length); StriContainerInteger value_cont(value, vectorize_length); UCalendarDateFields units_field; switch (units_cur) { case 0: units_field = UCAL_YEAR; break; case 1: units_field = UCAL_MONTH; break; case 2: units_field = UCAL_WEEK_OF_YEAR; break; case 3: units_field = UCAL_DAY_OF_MONTH; break; case 4: units_field = UCAL_HOUR_OF_DAY; break; case 5: units_field = UCAL_MINUTE; break; case 6: units_field = UCAL_SECOND; break; case 7: units_field = UCAL_MILLISECOND; break; default: throw StriException(MSG__INCORRECT_MATCH_OPTION, "units"); } UErrorCode status = U_ZERO_ERROR; cal = Calendar::createInstance(locale_val, status); STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */}) cal->adoptTimeZone(tz_val); tz_val = NULL; /* The Calendar takes ownership of the TimeZone. */ SEXP ret; STRI__PROTECT(ret = Rf_allocVector(REALSXP, vectorize_length)); double* ret_val = REAL(ret); for (R_len_t i=0; i<vectorize_length; ++i) { if (time_cont.isNA(i) || value_cont.isNA(i)) { ret_val[i] = NA_REAL; continue; } status = U_ZERO_ERROR; cal->setTime((UDate)(time_cont.get(i)*1000.0), status); STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */}) status = U_ZERO_ERROR; cal->add(units_field, value_cont.get(i), status); STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */}) status = U_ZERO_ERROR; ret_val[i] = ((double)cal->getTime(status))/1000.0; STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */}) } if (!isNull(tz)) Rf_setAttrib(ret, Rf_ScalarString(Rf_mkChar("tzone")), tz); stri__set_class_POSIXct(ret); if (tz_val) { delete tz_val; tz_val = NULL; } if (cal) { delete cal; cal = NULL; } STRI__UNPROTECT_ALL return ret; STRI__ERROR_HANDLER_END({ if (tz_val) { delete tz_val; tz_val = NULL; } if (cal) { delete cal; cal = NULL; } }) }