/** * Compare elements in 2 character vectors, with collation * * @param e1 character vector * @param e2 character vector * @param opts_collator passed to stri__ucol_open() * @param type [internal] vector of length 2, * type[0]: 0 for ==, -1 for < and 1 for >, * type[1]: 0 or 1 (whether to negate the results) * * @return logical vector * * @version 0.2-1 (Marek Gagolewski, 2014-03-19) * * @version 0.2-3 (Marek Gagolewski, 2014-05-07) * opts_collator == NA no longer allowed * * @version 0.3-1 (Marek Gagolewski, 2014-11-04) * Issue #112: str_prepare_arg* retvals were not PROTECTed from gc */ SEXP stri_cmp_logical(SEXP e1, SEXP e2, SEXP opts_collator, SEXP type) { // we'll perform a collator-based cmp // type is an internal arg, check manually, error() allowed here if (!Rf_isInteger(type) || LENGTH(type) != 2) Rf_error(MSG__INCORRECT_INTERNAL_ARG); int _type = INTEGER(type)[0]; int _negate = INTEGER(type)[1]; if (_type > 1 || _type < -1 || _negate < 0 || _negate > 1) Rf_error(MSG__INCORRECT_INTERNAL_ARG); PROTECT(e1 = stri_prepare_arg_string(e1, "e1")); // prepare string argument PROTECT(e2 = stri_prepare_arg_string(e2, "e2")); // prepare string argument // call stri__ucol_open after prepare_arg: // if prepare_arg had failed, we would have a mem leak UCollator* col = NULL; col = stri__ucol_open(opts_collator); STRI__ERROR_HANDLER_BEGIN(2) R_len_t vectorize_length = stri__recycling_rule(true, 2, LENGTH(e1), LENGTH(e2)); StriContainerUTF8 e1_cont(e1, vectorize_length); StriContainerUTF8 e2_cont(e2, vectorize_length); SEXP ret; STRI__PROTECT(ret = Rf_allocVector(LGLSXP, vectorize_length)); int* ret_tab = LOGICAL(ret); for (R_len_t i = 0; i < vectorize_length; ++i) { if (e1_cont.isNA(i) || e2_cont.isNA(i)) { ret_tab[i] = NA_LOGICAL; continue; } R_len_t cur1_n = e1_cont.get(i).length(); const char* cur1_s = e1_cont.get(i).c_str(); R_len_t cur2_n = e2_cont.get(i).length(); const char* cur2_s = e2_cont.get(i).c_str(); // with collation UErrorCode status = U_ZERO_ERROR; ret_tab[i] = (_type == (int)ucol_strcollUTF8(col, cur1_s, cur1_n, cur2_s, cur2_n, &status )); STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */}) if (_negate) ret_tab[i] = !ret_tab[i]; }
/** * Count pattern occurcess in a string [with collation] * * @param str character vector * @param pattern character vector * @param opts_collator passed to stri__ucol_open() * @return integer vector * * @version 0.1-?? (Marek Gagolewski) * * @version 0.1-?? (Marek Gagolewski) * corrected behavior on empty str/pattern * * @version 0.1-?? (Marek Gagolewski, 2013-06-23) * make StriException-friendly, * use StriContainerUStringSearch * * @version 0.2-3 (Marek Gagolewski, 2014-05-08) * new fun: stri_count_coll (opts_collator == NA not allowed) * * @version 0.3-1 (Marek Gagolewski, 2014-11-04) * Issue #112: str_prepare_arg* retvals were not PROTECTed from gc */ SEXP stri_count_coll(SEXP str, SEXP pattern, SEXP opts_collator) { PROTECT(str = stri_prepare_arg_string(str, "str")); PROTECT(pattern = stri_prepare_arg_string(pattern, "pattern")); // call stri__ucol_open after prepare_arg: // if prepare_arg had failed, we would have a mem leak UCollator* collator = NULL; collator = stri__ucol_open(opts_collator); STRI__ERROR_HANDLER_BEGIN(2) R_len_t vectorize_length = stri__recycling_rule(true, 2, LENGTH(str), LENGTH(pattern)); StriContainerUTF16 str_cont(str, vectorize_length); StriContainerUStringSearch pattern_cont(pattern, vectorize_length, collator); // collator is not owned by pattern_cont SEXP ret; STRI__PROTECT(ret = Rf_allocVector(INTSXP, vectorize_length)); int* ret_tab = INTEGER(ret); for (R_len_t i = pattern_cont.vectorize_init(); i != pattern_cont.vectorize_end(); i = pattern_cont.vectorize_next(i)) { STRI__CONTINUE_ON_EMPTY_OR_NA_STR_PATTERN(str_cont, pattern_cont, ret_tab[i] = NA_INTEGER, ret_tab[i] = 0) UStringSearch *matcher = pattern_cont.getMatcher(i, str_cont.get(i)); usearch_reset(matcher); UErrorCode status = U_ZERO_ERROR; R_len_t found = 0; while (!U_FAILURE(status) && ((int)usearch_next(matcher, &status) != USEARCH_DONE)) ++found; STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */}) ret_tab[i] = found; } if (collator) { ucol_close(collator); collator=NULL; } STRI__UNPROTECT_ALL return ret; STRI__ERROR_HANDLER_END( if (collator) ucol_close(collator); ) }
/** * Count pattern occurcess in a string [with collation] * * @param str character vector * @param pattern character vector * @param collator_opts passed to stri__ucol_open(), * if \code{NA}, then \code{stri_detect_fixed_byte} is called * @return integer vector * * @version 0.1 (Marek Gagolewski) * @version 0.2 (Marek Gagolewski) - corrected behavior on empty str/pattern * @version 0.3 (Marek Gagolewski, 2013-06-23) make StriException-friendly, * use StriContainerUStringSearch */ SEXP stri_count_fixed(SEXP str, SEXP pattern, SEXP collator_opts) { str = stri_prepare_arg_string(str, "str"); pattern = stri_prepare_arg_string(pattern, "pattern"); // call stri__ucol_open after prepare_arg: // if prepare_arg had failed, we would have a mem leak UCollator* collator = stri__ucol_open(collator_opts); if (!collator) return stri__count_fixed_byte(str, pattern); STRI__ERROR_HANDLER_BEGIN R_len_t vectorize_length = stri__recycling_rule(true, 2, LENGTH(str), LENGTH(pattern)); StriContainerUTF16 str_cont(str, vectorize_length); StriContainerUStringSearch pattern_cont(pattern, vectorize_length, collator); // collator is not owned by pattern_cont SEXP ret; PROTECT(ret = Rf_allocVector(INTSXP, vectorize_length)); int* ret_tab = INTEGER(ret); for (R_len_t i = pattern_cont.vectorize_init(); i != pattern_cont.vectorize_end(); i = pattern_cont.vectorize_next(i)) { STRI__CONTINUE_ON_EMPTY_OR_NA_STR_PATTERN(str_cont, pattern_cont, ret_tab[i] = NA_INTEGER, ret_tab[i] = 0) UStringSearch *matcher = pattern_cont.getMatcher(i, str_cont.get(i)); usearch_reset(matcher); UErrorCode status = U_ZERO_ERROR; ret_tab[i] = 0; while (((int)usearch_next(matcher, &status) != USEARCH_DONE) && !U_FAILURE(status)) ++ret_tab[i]; if (U_FAILURE(status)) throw StriException(status); } if (collator) { ucol_close(collator); collator=NULL; } UNPROTECT(1); return ret; STRI__ERROR_HANDLER_END( if (collator) ucol_close(collator); ) }