* * @version 1.3.1 (Marek Gagolewski, 2019-02-06) * new retval field: ICU.UTF8 */ SEXP stri_info() { STRI__ERROR_HANDLER_BEGIN(0) const R_len_t infosize = 7; SEXP vals; STRI__PROTECT(vals = Rf_allocVector(VECSXP, infosize)); SET_VECTOR_ELT(vals, 0, Rf_mkString(U_UNICODE_VERSION)); SET_VECTOR_ELT(vals, 1, Rf_mkString(U_ICU_VERSION)); SET_VECTOR_ELT(vals, 2, stri_locale_info(R_NilValue)); // may call Rf_error SET_VECTOR_ELT(vals, 3, stri__make_character_vector_char_ptr(2, "UTF-8", "UTF-16")); // fixed strings SET_VECTOR_ELT(vals, 4, stri_enc_info(R_NilValue)); // may call Rf_error SET_VECTOR_ELT(vals, 5, Rf_ScalarLogical(STRI_ICU_FOUND)); SET_VECTOR_ELT(vals, 6, Rf_ScalarLogical(0)); #ifdef U_CHARSET_IS_UTF8 #if U_CHARSET_IS_UTF8 SET_VECTOR_ELT(vals, 6, Rf_ScalarLogical(1)); #endif #endif stri__set_names(vals, infosize, "Unicode.version", "ICU.version", "Locale", "Charset.internal", "Charset.native", "ICU.system", "ICU.UTF8"); STRI__UNPROTECT_ALL
/** Fetch information on an encoding * * @param enc either NULL or "" for default encoding, * or one string with encoding name * @return R list object with many components (see R doc for details) * * @version 0.1-?? (Marek Gagolewski) * * @version 0.2-1 (Marek Gagolewski) * use StriUcnv; make StriException-friendly * * @version 0.3-1 (Marek Gagolewski, 2014-11-04) * Issue #112: str_prepare_arg* retvals were not PROTECTed from gc */ SEXP stri_enc_info(SEXP enc) { const char* selected_enc = stri__prepare_arg_enc(enc, "enc", true/*default ok*/); /* this is R_alloc'ed */ STRI__ERROR_HANDLER_BEGIN(0) StriUcnv uconv_obj(selected_enc); //uconv_obj.setCallBackSubstitute(); // restore default callbacks (no warning) UConverter* uconv = uconv_obj.getConverter(false); UErrorCode status = U_ZERO_ERROR; // get the list of available standards vector<const char*> standards = StriUcnv::getStandards(); R_len_t standards_n = (R_len_t)standards.size(); // alloc output list SEXP vals; SEXP names; const int nval = standards_n+2+5; STRI__PROTECT(names = Rf_allocVector(STRSXP, nval)); SET_STRING_ELT(names, 0, Rf_mkChar("Name.friendly")); SET_STRING_ELT(names, 1, Rf_mkChar("Name.ICU")); for (R_len_t i=0; i<standards_n; ++i) { if (standards[i]) SET_STRING_ELT(names, i+2, Rf_mkChar((string("Name.")+standards[i]).c_str())); } SET_STRING_ELT(names, nval-5, Rf_mkChar("ASCII.subset")); SET_STRING_ELT(names, nval-4, Rf_mkChar("Unicode.1to1")); SET_STRING_ELT(names, nval-3, Rf_mkChar("CharSize.8bit")); SET_STRING_ELT(names, nval-2, Rf_mkChar("CharSize.min")); SET_STRING_ELT(names, nval-1, Rf_mkChar("CharSize.max")); STRI__PROTECT(vals = Rf_allocVector(VECSXP, nval)); // get canonical (ICU) name status = U_ZERO_ERROR; const char* canname = ucnv_getName(uconv, &status); if (U_FAILURE(status) || !canname) { SET_VECTOR_ELT(vals, 1, Rf_ScalarString(NA_STRING)); Rf_warning(MSG__ENC_ERROR_GETNAME); } else { SET_VECTOR_ELT(vals, 1, stri__make_character_vector_char_ptr(1, canname)); // friendly name const char* frname = StriUcnv::getFriendlyName(canname); if (frname) SET_VECTOR_ELT(vals, 0, stri__make_character_vector_char_ptr(1, frname)); else SET_VECTOR_ELT(vals, 0, Rf_ScalarString(NA_STRING)); // has ASCII as its subset? SET_VECTOR_ELT(vals, nval-5, Rf_ScalarLogical((int)uconv_obj.hasASCIIsubset())); // min,max character size, is 8bit? int mincharsize = (int)ucnv_getMinCharSize(uconv); int maxcharsize = (int)ucnv_getMaxCharSize(uconv); int is8bit = (mincharsize==1 && maxcharsize == 1); SET_VECTOR_ELT(vals, nval-3, Rf_ScalarLogical(is8bit)); SET_VECTOR_ELT(vals, nval-2, Rf_ScalarInteger(mincharsize)); SET_VECTOR_ELT(vals, nval-1, Rf_ScalarInteger(maxcharsize)); // is there a one-to-one correspondence with Unicode? if (!is8bit) SET_VECTOR_ELT(vals, nval-4, Rf_ScalarLogical(NA_LOGICAL)); else SET_VECTOR_ELT(vals, nval-4, Rf_ScalarLogical((int)uconv_obj.is1to1Unicode())); // other standard names for (R_len_t i=0; i<standards_n; ++i) { if (!standards[i]) continue; status = U_ZERO_ERROR; const char* stdname = ucnv_getStandardName(canname, standards[i], &status); if (U_FAILURE(status) || !stdname) SET_VECTOR_ELT(vals, i+2, Rf_ScalarString(NA_STRING)); else SET_VECTOR_ELT(vals, i+2, stri__make_character_vector_char_ptr(1, stdname)); } } Rf_setAttrib(vals, R_NamesSymbol, names); STRI__UNPROTECT_ALL return vals; STRI__ERROR_HANDLER_END({/* no special action on error */}) }