int str_convert_string(string_encoding_t from_charset, size_t from_length, uint8_t *from_string_p, size_t spare_bits, string_encoding_t to_charset, size_t *to_length_p, uint8_t *to_string_p) { CONVERTER_HANDLE from_conv; CONVERTER_HANDLE to_conv; bool from_7_bit = false; bool to_7_bit = false; size_t length; uint8_t *string_p; int converted = 0; int result; CN_LOG_D("from_charset=%d, from_length=%d, to_charset=%d, *to_length_p=%d", from_charset, from_length, to_charset, *to_length_p); if (from_charset == to_charset) { /* No conversion, adjust/copy length and strings, and return */ if (*to_length_p > from_length) { *to_length_p = from_length; } memcpy(to_string_p, from_string_p, *to_length_p); to_string_p[*to_length_p] = '\0'; return from_length; } /* GSM Default alphabet (7-bit packed) is not supported by the converters. * These strings must first be massaged from an array of septets to octets... */ if (CHARSET_GSM_DEFAULT_7_BIT == from_charset) { from_charset = CHARSET_GSM_DEFAULT_8_BIT; from_7_bit = true; } /* or from an array of octets to septets. */ if (CHARSET_GSM_DEFAULT_7_BIT == to_charset) { to_charset = CHARSET_GSM_DEFAULT_8_BIT; to_7_bit = true; } /* (Al)Locate converter(s) */ result = str_convert_get_converter(from_charset, &from_conv, to_charset, &to_conv); if (result < 0) { CN_LOG_E("Unable to convert from character set %d to %d!", from_charset, to_charset); return result; } /* Run zeroth conversion */ if (from_7_bit) { length = from_length + from_length / 7; string_p = (uint8_t *)alloca(length + 1); length = str_convert_septet_to_octet(from_length, from_string_p, spare_bits, length, string_p); } else { length = from_length; string_p = from_string_p; } /* Run first conversion */ #ifdef USE_ICONV int errnum; char *inbuf_p = string_p; size_t inbytesleft = (size_t)length; char *outbuf_p; char *outstr_p; size_t outbytesleft, outstr_length; size_t res; /* Allocate memory to hold result string and call ICONV to perform the conversion. * Assume worst case, 1 input character becomes 4 output characters. */ outstr_length = outbytesleft = (length + 1) * sizeof(uint32_t); outstr_p = outbuf_p = (char *)alloca(outbytesleft); res = iconv(from_conv, &inbuf_p, &inbytesleft, &outbuf_p, &outbytesleft); /* Check for errors */ if (res == (size_t) - 1) { errnum = errno; CN_LOG_E("iconv error %d (%s) !", errnum, strerror(errnum)); return -1; } /* Find out how many bytes iconv actualy used for the converted string */ outstr_length -= outbytesleft; #elif USE_UCNV UChar *ustr_p; int32_t ustr_length; UErrorCode errcode = U_ZERO_ERROR; int32_t res; if (str_convert_is_utf_16(from_charset)) { /* From string is already UTF-16 encoded. Skip this conversion step */ ustr_length = length; ustr_p = (UChar *)string_p; } else { /* Allocate memory to hold result string and call ICU to perform the conversion */ ustr_length = length + 1; ustr_p = (UChar *)alloca((size_t)(ustr_length * sizeof(UChar))); res = ucnv_toUChars(from_conv, ustr_p, ustr_length, (const char *)string_p, length, &errcode); if (res < 0) { CN_LOG_E("ucnv_toUChars res %d!", res); return -1; } /* Check for errors, ignore warnings */ if (U_ZERO_ERROR != errcode && errcode > U_ERROR_WARNING_LIMIT) { CN_LOG_E("ucnv_toUChars error code %d!", errcode); return -1; } #ifdef CN_DEBUG_ENABLED { char datastr[100] = { '\0' }; char str[10]; int i; for (i = 0; i < 30 && i < res; i++) { sprintf(str, "%02X ", *(ustr_p + i)); strcat(datastr, str); } CN_LOG_D("ucnv_toUChars(conv=%p, charset=%d, ustr=\"%s\") = %d", (void *)from_conv, from_charset, datastr, res); } #endif /* CN_DEBUG_ENABLED */ ustr_length = res; } #endif /* USE_UCNV */ /* Run second conversion, unless it is the same converter as we ran before */ if (to_conv == from_conv) { /* One step conversion, copy result to output parameters and return */ #ifdef USE_ICONV /* ICONV uses a one step conversion */ if (*to_length_p > outstr_length) { *to_length_p = outstr_length; } memcpy(to_string_p, outstr_p, *to_length_p); to_string_p[*to_length_p] = '\0'; converted = outstr_length; #elif USE_UCNV if (*to_length_p > (size_t)ustr_length) { *to_length_p = (size_t)ustr_length; } memcpy(to_string_p, ustr_p, *to_length_p); to_string_p[*to_length_p] = '\0'; converted = ustr_length; #endif } else { #ifdef USE_UCNV res = ucnv_fromUChars(to_conv, (char *)to_string_p, (int32_t) * to_length_p, ustr_p, ustr_length, &errcode); /* Check for errors, ignore warnings */ if (U_ZERO_ERROR != errcode && errcode > U_ERROR_WARNING_LIMIT) { CN_LOG_E("ucnv_fromUChars error code %d!", errcode); return -1; } if (*to_length_p > (size_t)res) { *to_length_p = (size_t)res; } #ifdef CN_DEBUG_ENABLED { char datastr[100] = { '\0' }; char str[10]; int i; for (i = 0; i < 30 && i < res; i++) { sprintf(str, "%02X ", *(to_string_p + i)); strcat(datastr, str); } CN_LOG_D("ucnv_fromUChars(conv=%p, charset=%d, str=\"%s\") = %d", (void *)to_conv, to_charset, datastr, res); } #endif /* CN_DEBUG_ENABLED */ converted = res; #endif } /* Run final conversion, convert in-place */ if (to_7_bit) { converted = str_convert_octet_to_septet(converted, to_string_p, converted, to_string_p); } return converted; }
UErrorCode convert_to_utf8(const UChar* buffer, int32_t buffer_len, char** converted_buf, int32_t *converted_buf_len, bool force, bool* dropped_bytes) { UErrorCode status = U_ZERO_ERROR; UConverter *conv; int32_t utfConvertedLen = 0; // used to set dropped_bytes flag if force is true FromUFLAGContext * context = NULL; // open UTF8 converter conv = ucnv_open("utf-8", &status); if (U_FAILURE(status)) { ereport(WARNING, (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), errmsg("Cannot open utf-8 converter - error: %s.\n", u_errorName(status)))); ucnv_close(conv); return status; } if (force) { // set callback to skip illegal, irregular or unassigned bytes // set converter to use SKIP callback // contecxt will save and call it after calling custom callback ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, NULL, &status); //TODO: refactor warning and error message reporting if (U_FAILURE(status)) { ereport(WARNING, (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), errmsg("Cannot set callback on converter - error: %s.\n", u_errorName(status)))); ucnv_close(conv); return status; } // initialize flagging callback context = flagCB_fromU_openContext(); /* Set our special callback */ ucnv_setFromUCallBack(conv, flagCB_fromU, context, &(context->subCallback), &(context->subContext), &status ); if (U_FAILURE(status)) { ereport(WARNING, (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), errmsg("Cannot set callback on converter - error: %s.\n", u_errorName(status)))); ucnv_close(conv); return status; } } // convert to UTF8 // input buffer from ucnv_toUChars, which always returns a // NUL-terminated buffer utfConvertedLen = ucnv_fromUChars(conv, *converted_buf, *converted_buf_len, buffer, STRING_IS_NULL_TERMINATED, &status ); if (U_SUCCESS(status)) { *converted_buf_len = utfConvertedLen; ereport(DEBUG1, (errcode(ERRCODE_SUCCESSFUL_COMPLETION), errmsg("Converted string: %s\n", (const char*) *converted_buf))); // see if any bytes where dropped // context struct will go away when converter is closed if (NULL != context) *dropped_bytes = context->flag; else *dropped_bytes = false; } if (U_FAILURE(status)) { ereport(WARNING, (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), errmsg("ICU conversion from Unicode to UTF8 failed - error: %s.\n", u_errorName(status)))); } // close the converter ucnv_close(conv); return status; }
static int32_t uCharsToChars( char* target,int32_t targetLen, UChar* source, int32_t sourceLen,UErrorCode* status){ int i=0, j=0; char str[30]={'\0'}; while(i<sourceLen){ if (source[i] == '\n') { if (j + 2 < targetLen) { uprv_strcat(target, "\\n"); } j += 2; }else if(source[i]==0x0D){ if(j+2<targetLen){ uprv_strcat(target,"\\f"); } j+=2; }else if(source[i] == '"'){ if(source[i-1]=='\''){ if(j+2<targetLen){ uprv_strcat(target,"\\"); target[j+1]= (char)source[i]; } j+=2; }else if(source[i-1]!='\\'){ if(j+2<targetLen){ uprv_strcat(target,"\\"); target[j+1]= (char)source[i]; } j+=2; }else if(source[i-1]=='\\'){ target[j++]= (char)source[i]; } }else if(source[i]=='\\'){ if(i+1<sourceLen){ switch(source[i+1]){ case ',': case '!': case '?': case '#': case '.': case '%': case '&': case ':': case ';': if(j+2<targetLen){ uprv_strcat(target,"\\\\"); } j+=2; break; case '"': case '\'': if(j+3<targetLen){ uprv_strcat(target,"\\\\\\"); } j+=3; break; default : if(j<targetLen){ target[j]=(char)source[i]; } j++; break; } }else{ if(j<targetLen){ uprv_strcat(target,"\\\\"); } j+=2; } }else if(source[i]>=0x20 && source[i]<0x7F/*ASCII*/){ if(j<targetLen){ target[j] = (char) source[i]; } j++; }else{ if(*enc =='\0' || source[i]==0x0000){ uprv_strcpy(str,"\\u"); itostr(str+2,source[i],16,4); if(j+6<targetLen){ uprv_strcat(target,str); } j+=6; }else{ char dest[30] = {0}; int retVal=ucnv_fromUChars(conv,dest,30,source+i,1,status); if(U_FAILURE(*status)){ return 0; } if(j+retVal<targetLen){ uprv_strcat(target,dest); } j+=retVal; } } i++; } return j; }
/* test invariant-character handling */ static void TestInvariant() { /* all invariant graphic chars and some control codes (not \n!) */ const char invariantChars[]= "\t\r \"%&'()*+,-./" "0123456789:;<=>?" "ABCDEFGHIJKLMNOPQRSTUVWXYZ_" "abcdefghijklmnopqrstuvwxyz"; const UChar invariantUChars[]={ 9, 0xd, 0x20, 0x22, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5f, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0 }; const char variantChars[]="\n!#$@[\\]^`{|}~"; const UChar variantUChars[]={ 0x0a, 0x21, 0x23, 0x24, 0x40, 0x5b, 0x5c, 0x5d, 0x5e, 0x60, 0x7b, 0x7c, 0x7d, 0x7e, 0 }; const UChar nonASCIIUChars[]={ 0x80, 0xa0, 0x900, 0xff51 }; UChar us[120]; char cs[120]; int32_t i, length; /* make sure that all invariant characters convert both ways */ length=sizeof(invariantChars); u_charsToUChars(invariantChars, us, length); if(u_strcmp(us, invariantUChars)!=0) { log_err("u_charsToUChars(invariantChars) failed\n"); } u_UCharsToChars(invariantUChars, cs, length); if(strcmp(cs, invariantChars)!=0) { log_err("u_UCharsToChars(invariantUChars) failed\n"); } /* * make sure that variant characters convert from source code literals to Unicode * but not back to char * */ length=sizeof(variantChars); u_charsToUChars(variantChars, us, length); if(u_strcmp(us, variantUChars)!=0) { log_err("u_charsToUChars(variantChars) failed\n"); } #ifdef NDEBUG /* * Test u_UCharsToChars(variantUChars) only in release mode because it will * cause an assertion failure in debug builds. */ u_UCharsToChars(variantUChars, cs, length); for(i=0; i<length; ++i) { if(cs[i]!=0) { log_err("u_UCharsToChars(variantUChars) converted the %d-th character to %02x instead of 00\n", i, cs[i]); } } #endif /* * Verify that invariant characters roundtrip from Unicode to the * default converter and back. */ { UConverter *cnv; UErrorCode errorCode; errorCode=U_ZERO_ERROR; cnv=ucnv_open(NULL, &errorCode); if(U_FAILURE(errorCode)) { log_err("unable to open the default converter\n"); } else { length=ucnv_fromUChars(cnv, cs, sizeof(cs), invariantUChars, -1, &errorCode); if(U_FAILURE(errorCode)) { log_err("ucnv_fromUChars(invariantUChars) failed - %s\n", u_errorName(errorCode)); } else if(length!=sizeof(invariantChars)-1 || strcmp(cs, invariantChars)!=0) { log_err("ucnv_fromUChars(invariantUChars) failed\n"); } errorCode=U_ZERO_ERROR; length=ucnv_toUChars(cnv, us, LENGTHOF(us), invariantChars, -1, &errorCode); if(U_FAILURE(errorCode)) { log_err("ucnv_toUChars(invariantChars) failed - %s\n", u_errorName(errorCode)); } else if(length!=LENGTHOF(invariantUChars)-1 || u_strcmp(us, invariantUChars)!=0) { log_err("ucnv_toUChars(invariantChars) failed\n"); } ucnv_close(cnv); } } /* API tests */ if(!uprv_isInvariantString(invariantChars, -1)) { log_err("uprv_isInvariantString(invariantChars) failed\n"); } if(!uprv_isInvariantUString(invariantUChars, -1)) { log_err("uprv_isInvariantUString(invariantUChars) failed\n"); } if(!uprv_isInvariantString(invariantChars+strlen(invariantChars), 1)) { log_err("uprv_isInvariantString(\"\\0\") failed\n"); } for(i=0; i<(sizeof(variantChars)-1); ++i) { if(uprv_isInvariantString(variantChars+i, 1)) { log_err("uprv_isInvariantString(variantChars[%d]) failed\n", i); } if(uprv_isInvariantUString(variantUChars+i, 1)) { log_err("uprv_isInvariantUString(variantUChars[%d]) failed\n", i); } } for(i=0; i<LENGTHOF(nonASCIIUChars); ++i) { if(uprv_isInvariantUString(nonASCIIUChars+i, 1)) { log_err("uprv_isInvariantUString(nonASCIIUChars[%d]) failed\n", i); } } }
i8_q* QStringUnicode::ToBytes(const EQTextEncoding &eEncoding, unsigned int &uOutputLength) const { i8_q* pOutputBytes = null_q; uOutputLength = 0; const unsigned int CHARACTERS_COUNT = m_strString.countChar32(); // It does not include the final null character if(CHARACTERS_COUNT > 0) { UErrorCode errorCode = U_ZERO_ERROR; UConverter* pConverter = QStringUnicode::GetConverter(eEncoding); const unsigned int CODE_UNITS_COUNT = m_strString.length(); // It does not include the final null character // Depending on whether the string is already null-terminated or not, a null terminator will be added at the end // of the resultant array of bytes const unsigned int ADD_NULL_TERMINATION = m_strString.char32At(CHARACTERS_COUNT - 1) == 0 ? 0 : 1; // By default, it is assigned as if it was to be encoded in ASCII or ISO 8859-1 (8-bits per character) int32_t nRequiredLengthBytes = CHARACTERS_COUNT + ADD_NULL_TERMINATION; // Output size calculation for Unicode encoding forms switch(eEncoding) { case EQTextEncoding::E_UTF8: // It is not possible to know in advance how much memory the UTF-8 will require // (each character could be represented by 1, 2, 3 or 4 8-bits code units) so we reserve the maximum it would need nRequiredLengthBytes = sizeof(i32_q) * (CHARACTERS_COUNT + ADD_NULL_TERMINATION); break; case EQTextEncoding::E_UTF16: // We already know the number of 16 bits code units. A BOM character is added at the beginning nRequiredLengthBytes = sizeof(i16_q) * (CODE_UNITS_COUNT + 1 + ADD_NULL_TERMINATION); break; case EQTextEncoding::E_UTF16BE: case EQTextEncoding::E_UTF16LE: // We already know the number of 16 bits code units nRequiredLengthBytes = sizeof(i16_q) * (CODE_UNITS_COUNT + ADD_NULL_TERMINATION); break; case EQTextEncoding::E_UTF32: // The width of UTF32 characters is always 32 bits. A BOM character is added at the beginning nRequiredLengthBytes = sizeof(i32_q) * (CHARACTERS_COUNT + 1 + ADD_NULL_TERMINATION); break; case EQTextEncoding::E_UTF32BE: case EQTextEncoding::E_UTF32LE: // The width of UTF32 characters is always 32 bits nRequiredLengthBytes = sizeof(i32_q) * (CHARACTERS_COUNT + ADD_NULL_TERMINATION); break; } // Conversion from native encoding (UTF16) to input encoding const UChar* pBuffer = m_strString.getBuffer(); pOutputBytes = new char[nRequiredLengthBytes]; ucnv_reset(pConverter); uOutputLength = ucnv_fromUChars(pConverter, pOutputBytes, nRequiredLengthBytes, pBuffer, CODE_UNITS_COUNT, &errorCode); // If it was necessary to add a null terminator... if(ADD_NULL_TERMINATION == 1) { // The last character has to be set to zero (ICU adds only 1 byte at the end as the null terminator) // The last character has to be added to the output length switch(eEncoding) { case EQTextEncoding::E_ASCII: case EQTextEncoding::E_ISO88591: case EQTextEncoding::E_UTF8: // 8 bits character uOutputLength += sizeof(i8_q); memset(&pOutputBytes[uOutputLength - sizeof(i8_q)], 0, sizeof(i8_q)); break; case EQTextEncoding::E_UTF16: case EQTextEncoding::E_UTF16BE: case EQTextEncoding::E_UTF16LE: // 16 bits character uOutputLength += sizeof(i16_q); memset(&pOutputBytes[uOutputLength - sizeof(i16_q)], 0, sizeof(i16_q)); break; case EQTextEncoding::E_UTF32: case EQTextEncoding::E_UTF32BE: case EQTextEncoding::E_UTF32LE: // 32 bits character uOutputLength += sizeof(i32_q); memset(&pOutputBytes[uOutputLength - sizeof(i32_q)], 0, sizeof(i32_q)); break; } } } return pOutputBytes; }
/** * Convert character vector between marked encodings and the encoding provided * * @param str input character vector or list of raw vectors * @param to target encoding, \code{NULL} or \code{""} for default enc * @param to_raw single logical, should list of raw vectors be returned? * @return a converted character vector or list of raw vectors * * @version 0.1-?? (Marek Gagolewski, 2013-11-12) * * @version 0.2-1 (Marek Gagolewski, 2014-03-28) * use StriUcnv * * @version 0.2-1 (Marek Gagolewski, 2014-04-01) * calc required buf size a priori * * @version 0.3-1 (Marek Gagolewski, 2014-11-04) * Issue #112: str_prepare_arg* retvals were not PROTECTed from gc */ SEXP stri_encode_from_marked(SEXP str, SEXP to, SEXP to_raw) { PROTECT(str = stri_prepare_arg_string(str, "str")); const char* selected_to = stri__prepare_arg_enc(to, "to", true); /* this is R_alloc'ed */ bool to_raw_logical = stri__prepare_arg_logical_1_notNA(to_raw, "to_raw"); STRI__ERROR_HANDLER_BEGIN(1) R_len_t str_n = LENGTH(str); StriContainerUTF16 str_cont(str, str_n); // get the number of strings to convert; if == 0, then you know what's the result if (str_n <= 0) return Rf_allocVector(to_raw_logical?VECSXP:STRSXP, 0); // Open converters StriUcnv ucnv(selected_to); UConverter* uconv_to = ucnv.getConverter(true /*register_callbacks*/); // Get target encoding mark cetype_t encmark_to = to_raw_logical?CE_BYTES:ucnv.getCE(); // Prepare out val SEXP ret; STRI__PROTECT(ret = Rf_allocVector(to_raw_logical?VECSXP:STRSXP, str_n)); // calculate required buf size R_len_t bufsize = 0; for (R_len_t i=0; i<str_n; ++i) { if (!str_cont.isNA(i) && str_cont.get(i).length() > bufsize) bufsize = str_cont.get(i).length(); } bufsize = UCNV_GET_MAX_BYTES_FOR_STRING(bufsize, ucnv_getMaxCharSize(uconv_to)); // "The calculated size is guaranteed to be sufficient for this conversion." String8buf buf(bufsize); for (R_len_t i=0; i<str_n; ++i) { if (str_cont.isNA(i)) { if (to_raw_logical) SET_VECTOR_ELT(ret, i, R_NilValue); else SET_STRING_ELT(ret, i, NA_STRING); continue; } R_len_t curn_tmp = str_cont.get(i).length(); const UChar* curs_tmp = str_cont.get(i).getBuffer(); // The buffer content is (probably) not NUL-terminated. if (!curs_tmp) throw StriException(MSG__INTERNAL_ERROR); UErrorCode status = U_ZERO_ERROR; ucnv_resetFromUnicode(uconv_to); R_len_t bufneed = ucnv_fromUChars(uconv_to, buf.data(), buf.size(), curs_tmp, curn_tmp, &status); if (bufneed <= buf.size()) { STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */}) } else {// larger buffer needed [this shouldn't happen?] buf.resize(bufneed, false/*destroy contents*/); status = U_ZERO_ERROR; bufneed = ucnv_fromUChars(uconv_to, buf.data(), buf.size(), curs_tmp, curn_tmp, &status); STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */}) } if (to_raw_logical) { SEXP outobj; STRI__PROTECT(outobj = Rf_allocVector(RAWSXP, bufneed)); memcpy(RAW(outobj), buf.data(), (size_t)bufneed); SET_VECTOR_ELT(ret, i, outobj); STRI__UNPROTECT(1); } else { SET_STRING_ELT(ret, i, Rf_mkCharLenCE(buf.data(), bufneed, encmark_to)); } } STRI__UNPROTECT_ALL return ret; STRI__ERROR_HANDLER_END({/* nothing special on error */}) }
const std::string convU16StrToCharStr(const U16Char_t* src, const char* Encoding) { //static char const* const tocode = CHARCONV_ICONV_UTF16; char const* const tocode = getPlatformEncoding(Encoding); UErrorCode status = U_ZERO_ERROR; #ifdef ENCCONV_DEBUG std::cout << "\t" "convString" << std::endl; std::cout << "\t\t" "tocode = " << tocode << std::endl; //std::cout << "\t\t" "fromcode = " << fromcode << std::endl; #endif //iconv_t cd = iconv_open(tocode, fromcode); // Initializing ICU converter UConverter *conv = ucnv_open(tocode, &status); #ifdef CHARCONV_DEBUG std::cout << "\t\t" "aft ucnv_open: status = " << status << std::endl; #endif if (conv == NULL) { // try default encoding "ISO-8859-1" //throw std::runtime_error("Unable to create Unicode converter object"); status = U_ZERO_ERROR; conv = ucnv_open("ISO-8859-1", &status); } //still if conv is null simply return blank string if (conv == NULL) { return std::string(""); } U16Char_t const* srcWrk = src; const size_t srcSizeInUnits = GetNumOfUnits(src); const size_t srcSizeInBytes = srcSizeInUnits * sizeof(U16Char_t); const size_t dstSizeInBytes = MAX(256, (srcSizeInUnits + 1)) * 4; // How much byte buffer is needed? (UTF16 --> MBCS) char* dst = new char [dstSizeInBytes]; if(dst==NULL) return std::string(""); char* dstWrk =(char*)(dst); size_t srcLeftInBytes = srcSizeInBytes; size_t dstLeftInBytes = dstSizeInBytes - sizeof(char); status = U_ZERO_ERROR; ucnv_fromUChars(conv, dstWrk, dstLeftInBytes, (UChar*)srcWrk, -1, &status); U16Char_t* reverseConvertedVal = convCharStrToU16Str(dstWrk,Encoding); if(strcmp((char*)reverseConvertedVal,(char*)src)!=0) { EncConv::releaseU16Str(reverseConvertedVal); delete[] dst; return std::string(""); } EncConv::releaseU16Str(reverseConvertedVal); #ifdef CHARCONV_DEBUG std::cout << "\t\t" "aft iconv: status = " << status << std::endl; #endif if (status != U_ZERO_ERROR ) { // throw std::runtime_error("Unable to convert to string"); *dstWrk = 0; } std::string dst2(dst); delete[] dst; //const int err = iconv_close(cd); ucnv_close(conv); //if (err == -1) // throw std::runtime_error("Unable to deallocate iconv_t object"); return dst2; }
static void mmenc_func(sqlite3_context *db, int argc, sqlite3_value **argv) { mm_cipher_context_t *ctx; const UChar *src; int32_t src_len; char buf[1024]; char *dst = buf; int32_t dst_len; UErrorCode status = U_ZERO_ERROR; int arg_type; // only accept 1 argument. if (argc != 1) goto error_misuse; // encoding BLOB data type is not supported. arg_type = sqlite3_value_type(argv[0]); if (arg_type == SQLITE_BLOB) goto error_misuse; // for data types other than TEXT, just return them. if (arg_type != SQLITE_TEXT) { sqlite3_result_value(db, argv[0]); return; } ctx = (mm_cipher_context_t *) sqlite3_user_data(db); src_len = sqlite3_value_bytes16(argv[0]) / 2; src = (const UChar *) sqlite3_value_text16(argv[0]); // transform input string to BOCU-1 encoding. // try stack buffer first, if it doesn't fit, malloc a new buffer. dst_len = ucnv_fromUChars(ctx->cnv, dst, sizeof(buf), src, src_len, &status); if (status == U_BUFFER_OVERFLOW_ERROR) { status = U_ZERO_ERROR; dst = (char *) sqlite3_malloc(dst_len); dst_len = ucnv_fromUChars(ctx->cnv, dst, dst_len, src, src_len, &status); } if (U_FAILURE(status) && status != U_STRING_NOT_TERMINATED_WARNING) { sqlite3_mm_set_last_error( "Failed transforming text to internal encoding."); goto error_error; } // encrypt transformed BOCU-1 string. do_rc4(ctx, dst, dst_len); // return sqlite3_result_blob(db, dst, dst_len, SQLITE_TRANSIENT); if (dst != buf) sqlite3_free(dst); return; error_error: if (dst != buf) sqlite3_free(dst); sqlite3_result_error_code(db, SQLITE_ERROR); return; error_misuse: if (dst != buf) sqlite3_free(dst); sqlite3_result_error_code(db, SQLITE_MISUSE); return; }
int32_t __hs_ucnv_fromUChars(UConverter *cnv, char *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode) { return ucnv_fromUChars(cnv, dest, destCapacity, src, srcLength, pErrorCode); }