int WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar, LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar) { int length; BYTE* targetStart; const WCHAR* sourceStart; ConversionResult result; /* If cchWideChar is 0, the function fails */ if (cchWideChar == 0) return 0; /* If cchWideChar is -1, the string is null-terminated */ if (cchWideChar == -1) cchWideChar = _wcslen(lpWideCharStr) + 1; /* * if cbMultiByte is 0, the function returns the required buffer size * in bytes for lpMultiByteStr and makes no use of the output parameter itself. */ if (cbMultiByte == 0) { sourceStart = (WCHAR*) lpWideCharStr; targetStart = (BYTE*) NULL; result = ConvertUTF16toUTF8(&sourceStart, &sourceStart[cchWideChar], &targetStart, NULL, strictConversion); length = targetStart - ((BYTE*) NULL); cbMultiByte = length; } else { sourceStart = (WCHAR*) lpWideCharStr; targetStart = (BYTE*) lpMultiByteStr; result = ConvertUTF16toUTF8(&sourceStart, &sourceStart[cchWideChar], &targetStart, &targetStart[cbMultiByte], strictConversion); length = targetStart - ((BYTE*) lpMultiByteStr); cbMultiByte = length; } return cbMultiByte; }
std::string * keymagic_driver::U16toU8(const std::wstring& u16) { unsigned int length = u16.length() * 2; length++; const UTF16 * source = (UTF16*)u16.c_str(); const UTF16 * sourceStart = source; const UTF16 * sourceEnd = source + length; UTF8 * target = new UTF8[length]; UTF8 * targetStart = target; UTF8 * targetEnd = target + length; ConvertUTF16toUTF8(&sourceStart, sourceEnd, &targetStart, targetEnd, lenientConversion); char * cs = new char[length]; memset(cs, 0, length * sizeof (wchar_t)); for (int i = 0; i < targetStart - target; i++) { cs[i] = target[i]; } std::string * s = new std::string(cs); delete[] cs; delete[] target; return s; }
AnsiString WStoUTF8(const std::wstring& s){ AnsiString Value; if (s.size()==0) { return Value; } int32 len = 3 * s.size() + 1; Value.resize(len); UTF8* DestStart = (UTF8*)(&Value[0]); UTF8* DestEnd = DestStart + len; #ifdef WIN32 const UTF16* Start = (const UTF16*)s.c_str(); const UTF16* End = Start + s.size(); ConversionResult ret = ConvertUTF16toUTF8(&Start,End,&DestStart,DestEnd, strictConversion); #else const UTF32* Start = (const UTF32*)s.c_str(); const UTF32* End = Start + s.size(); ConversionResult ret = ConvertUTF32toUTF8(&Start,End,&DestStart,DestEnd, strictConversion); #endif if (ret != conversionOK) { //throw std::exception("UFT8 Convert Fail.");这种工具函数扔出例外似乎只会对宿主程序构成骚扰 //改为给出提示+可能的乱码 Value = "UFT16TO8 Convert Fail:"; return Value += (const char*)s.c_str(); } return Value.c_str(); }
inline int wchar_utf8(const std::wstring &wide, std::string &utf8) { // allocate space for worst-case utf8.resize(wide.size() * 6); if (wide.empty()) return 0; char* dst_start = &utf8[0]; wchar_t const* src_start = wide.c_str(); ConversionResult ret; if (sizeof(wchar_t) == sizeof(UTF32)) { ret = ConvertUTF32toUTF8((const UTF32**)&src_start, (const UTF32*)src_start + wide.size(), (UTF8**)&dst_start, (UTF8*)dst_start + utf8.size() , lenientConversion); utf8.resize(dst_start - &utf8[0]); return ret; } else if (sizeof(wchar_t) == sizeof(UTF16)) { ret = ConvertUTF16toUTF8((const UTF16**)&src_start, (const UTF16*)src_start + wide.size(), (UTF8**)&dst_start, (UTF8*)dst_start + utf8.size() , lenientConversion); utf8.resize(dst_start - &utf8[0]); return ret; } else { return sourceIllegal; } }
//------------------------------------------------------------------------------ // String16ToUTF8 //------------------------------------------------------------------------------ bool String16ToUTF8(const char16 *in, int len, std::string *out8) { assert(in); assert(len >= 0); assert(out8); if (len <= 0) { *out8 = ""; return true; } const UTF16 *source_ptr = reinterpret_cast<const UTF16*>(in); const UTF16 *source_end_ptr = source_ptr + len; // should point 'beyond last' // UTF8 string has at most 4 times as many 'characters' as UTF16 one. if (len > INT_MAX / 4) { // overflow check *out8 = ""; return false; } int out_len = len * 4; out8->resize(out_len); UTF8 *target_ptr = reinterpret_cast<UTF8*>(&(*out8)[0]); UTF8 *target_ptr_original = target_ptr; UTF8 *target_end_ptr = target_ptr + out_len; ConversionResult result = ConvertUTF16toUTF8(&source_ptr, source_end_ptr, &target_ptr, target_end_ptr, strictConversion); // Resize to be the size of the # of converted characters. // Note that stl strings always account for \0 end-of-line character // automatically, so no need to do "+1" here. out8->resize(result == conversionOK ? target_ptr - target_ptr_original : 0); return result == conversionOK; }
/* C Strings {{{ */ CYUTF8String CYPoolUTF8String(CYPool &pool, CYUTF16String utf16) { // XXX: this is wrong size_t size(utf16.size * 5); char *temp(new(pool) char[size]); const uint16_t *lhs(utf16.data); uint8_t *rhs(reinterpret_cast<uint8_t *>(temp)); _assert(ConvertUTF16toUTF8(&lhs, lhs + utf16.size, &rhs, rhs + size, lenientConversion) == conversionOK); *rhs = 0; return CYUTF8String(temp, reinterpret_cast<char *>(rhs) - temp); }
// Convert a UTF16 string into a UTF8 representation using the Unicode.org // supplied C algorithms, which are now contained within the ANTLR3 C runtime // as permitted by the Unicode license (within the source code antlr3convertutf.c/.h // UCS2 has the same encoding as UTF16 so we can use UTF16 converter. // static pANTLR3_STRING toUTF8_UTF16 (pANTLR3_STRING string) { UTF8 * outputEnd; UTF16 * inputEnd; pANTLR3_STRING utf8String; ConversionResult cResult; // Allocate the output buffer, which needs to accommodate potentially // 3X (in bytes) the input size (in chars). // utf8String = string->factory->newStr8(string->factory, (pANTLR3_UINT8)""); if (utf8String != NULL) { // Free existing allocation // ANTLR3_FREE(utf8String->chars); // Reallocate according to maximum expected size // utf8String->size = string->len *3; utf8String->chars = (pANTLR3_UINT8)ANTLR3_MALLOC(utf8String->size +1); if (utf8String->chars != NULL) { inputEnd = (UTF16 *) (string->chars); outputEnd = (UTF8 *) (utf8String->chars); // Call the Unicode converter // cResult = ConvertUTF16toUTF8 ( (const UTF16**)&inputEnd, ((const UTF16 *)(string->chars)) + string->len, &outputEnd, outputEnd + utf8String->size - 1, lenientConversion ); // We don't really care if things failed or not here, we just converted // everything that was vaguely possible and stopped when it wasn't. It is // up to the grammar programmer to verify that the input is sensible. // utf8String->len = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)outputEnd) - utf8String->chars); *(outputEnd+1) = '\0'; // Always null terminate } } return utf8String; }
intp StringParser::ConvertToBuffer(const wchar_t* str, size_t length, HeapString& outBuffer) { outBuffer.Clear(); if (length == 0) { return 0; } constexpr bool isUTF16 = sizeof(wchar_t) == 2; if (isUTF16) { //sizeof(wchar_t)==2 size_t utf8Size = length * 3 + 1; outBuffer.ReserveSize(utf8Size); const UTF16* sourceStart = reinterpret_cast<const UTF16*>(str); const UTF16* sourceEnd = sourceStart + length; UTF8* targetStart = reinterpret_cast<UTF8*>(outBuffer.MutableBuffer()); UTF8* targetEnd = targetStart + utf8Size; ConversionResult res = ConvertUTF16toUTF8(&sourceStart, sourceEnd, &targetStart, targetEnd, strictConversion); *targetStart = 0; if (res == conversionOK) { intp count = targetStart - reinterpret_cast<UTF8*>(outBuffer.MutableBuffer()); outBuffer.ForceSetLength(count); return count; } } else { //sizeof(wchar_t)==4 size_t utf8Size = length * 4 + 1; outBuffer.ReserveSize(utf8Size); const UTF32* sourceStart = reinterpret_cast<const UTF32*>(str); const UTF32* sourceEnd = sourceStart + length; UTF8* targetStart = reinterpret_cast<UTF8*>(outBuffer.MutableBuffer()); UTF8* targetEnd = targetStart + utf8Size; ConversionResult res = ConvertUTF32toUTF8(&sourceStart, sourceEnd, &targetStart, targetEnd, strictConversion); *targetStart = 0; if (res == conversionOK) { intp count = targetStart - reinterpret_cast<UTF8*>(outBuffer.MutableBuffer()); outBuffer.ForceSetLength(count); return count; } } return 0; }
static utf8_errors::error_code_enum convert(wchar_t const** src_start , wchar_t const* src_end , std::string& utf8) { char* dst_start = &utf8[0]; int ret = ConvertUTF16toUTF8( reinterpret_cast<UTF16 const**>(src_start) , reinterpret_cast<UTF16 const*>(src_end) , reinterpret_cast<UTF8**>(&dst_start) , reinterpret_cast<UTF8*>(dst_start + utf8.size()) , lenientConversion); utf8.resize(aux::numeric_cast<std::size_t>(dst_start - &utf8[0])); return static_cast<utf8_errors::error_code_enum>(ret); }
String StringUtils::wideString2utf8String( const WideString& wideString ) { size_t widesize = wideString.length(); String returnString; if ( sizeof( wchar_t ) == 2 ) { size_t utf8size = MAX_UTF8_CHAR_LENGTH * widesize + 1; returnString.resize( utf8size, '\0' ); const UTF16* sourcestart = reinterpret_cast<const UTF16*>( wideString.c_str() ); const UTF16* sourceend = sourcestart + widesize; UTF8* targetstart = reinterpret_cast<UTF8*>( &((returnString)[ 0 ]) ); UTF8* thisFirstWChar = targetstart; UTF8* targetend = targetstart + utf8size; ConversionResult res = ConvertUTF16toUTF8( &sourcestart, sourceend, &targetstart, targetend, strictConversion ); if ( res != conversionOK ) { throw Exception(Exception::ERROR_WIDE_2_UTF8, String("Could not convert from wide string to UTF8.")); } returnString.resize(targetstart - thisFirstWChar); } else if ( sizeof( wchar_t ) == 4 ) { size_t utf8size = MAX_UTF8_CHAR_LENGTH * widesize + 1; returnString.resize( utf8size, '\0' ); const UTF32* sourcestart = reinterpret_cast<const UTF32*>( wideString.c_str() ); const UTF32* sourceend = sourcestart + widesize; UTF8* targetstart = reinterpret_cast<UTF8*>( &((returnString)[ 0 ]) ); UTF8* thisFirstWChar = targetstart; UTF8* targetend = targetstart + utf8size; ConversionResult res = ConvertUTF32toUTF8( &sourcestart, sourceend, &targetstart, targetend, strictConversion ); if ( res != conversionOK ) { throw Exception(Exception::ERROR_WIDE_2_UTF8, String("Could not convert from wide string to UTF8.")); } returnString.resize(targetstart - thisFirstWChar); } else { throw Exception(Exception::ERROR_WIDE_2_UTF8, String("Could not convert from wide string to UTF8.")); } return returnString; }
inline String w2u(const wchar_t* wstr, size_t len) { String result; result.resize(len * AX_UTF8_LEN_MAX); if (sizeof(wchar_t) == sizeof(UTF16)) { const UTF16* srcstart = (UTF16*)&wstr[0]; const UTF16* srcend = srcstart + len; UTF8* dststart = (UTF8*)&result[0]; UTF8* dstend = dststart + result.size(); ConversionResult cr = ConvertUTF16toUTF8(&srcstart, srcend, &dststart, dstend, strictConversion); result.resize(dststart - (UTF8*)&result[0]); } return result; }
bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) { assert(Out.empty()); // Error out on an uneven byte count. if (SrcBytes.size() % 2) return false; // Avoid OOB by returning early on empty input. if (SrcBytes.empty()) return true; const UTF16 *Src = reinterpret_cast<const UTF16 *>(SrcBytes.begin()); const UTF16 *SrcEnd = reinterpret_cast<const UTF16 *>(SrcBytes.end()); // Byteswap if necessary. std::vector<UTF16> ByteSwapped; if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_SWAPPED) { ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd); for (unsigned I = 0, E = ByteSwapped.size(); I != E; ++I) ByteSwapped[I] = llvm::sys::SwapByteOrder_16(ByteSwapped[I]); Src = &ByteSwapped[0]; SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1; } // Skip the BOM for conversion. if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_NATIVE) Src++; // Just allocate enough space up front. We'll shrink it later. Allocate // enough that we can fit a null terminator without reallocating. Out.resize(SrcBytes.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT + 1); UTF8 *Dst = reinterpret_cast<UTF8 *>(&Out[0]); UTF8 *DstEnd = Dst + Out.size(); ConversionResult CR = ConvertUTF16toUTF8(&Src, SrcEnd, &Dst, DstEnd, strictConversion); assert(CR != targetExhausted); if (CR != conversionOK) { Out.clear(); return false; } Out.resize(reinterpret_cast<char *>(Dst) - &Out[0]); Out.push_back(0); Out.pop_back(); return true; }
static std::string ToUtf8(const std::wstring& widestring) { size_t widesize = widestring.length(); if (sizeof(wchar_t) == 2) { size_t utf8size = 3 * widesize + 1; char* utf8stringnative = new char[utf8size]; const UTF16* sourcestart = reinterpret_cast<const UTF16*>(widestring.c_str()); const UTF16* sourceend = sourcestart + widesize; UTF8* targetstart = reinterpret_cast<UTF8*>(utf8stringnative); UTF8* targetend = targetstart + utf8size; ConversionResult res = ConvertUTF16toUTF8(&sourcestart, sourceend, &targetstart, targetend, strictConversion); if (res != conversionOK) { delete [] utf8stringnative; throw std::exception(); } *targetstart = 0; std::string resultstring(utf8stringnative); delete [] utf8stringnative; return resultstring; } else if (sizeof(wchar_t) == 4) { size_t utf8size = 4 * widesize + 1; char* utf8stringnative = new char[utf8size]; const UTF32* sourcestart = reinterpret_cast<const UTF32*>(widestring.c_str()); const UTF32* sourceend = sourcestart + widesize; UTF8* targetstart = reinterpret_cast<UTF8*>(utf8stringnative); UTF8* targetend = targetstart + utf8size; ConversionResult res = ConvertUTF32toUTF8(&sourcestart, sourceend, &targetstart, targetend, strictConversion); if (res != conversionOK) { delete [] utf8stringnative; throw std::exception(); } *targetstart = 0; std::string resultstring(utf8stringnative); delete [] utf8stringnative; return resultstring; } else { throw std::exception(); } return ""; }
std::string ToUtf8(const std::wstring& widestring) { size_t widesize = widestring.length(); if (sizeof(wchar_t) == 2) { size_t utf8size = 3 * widesize + 1; std::string resultstring; resultstring.resize(utf8size, '\0'); const UTF16* sourcestart = reinterpret_cast<const UTF16*>(widestring.c_str()); const UTF16* sourceend = sourcestart + widesize; UTF8* targetstart = reinterpret_cast<UTF8*>(&resultstring[0]); UTF8* targetend = targetstart + utf8size; ConversionResult res = ConvertUTF16toUTF8 (&sourcestart, sourceend, &targetstart, targetend, strictConversion); if (res != conversionOK) { throw std::exception("La falla!"); } *targetstart = 0; return resultstring; } else if (sizeof(wchar_t) == 4) { size_t utf8size = 4 * widesize + 1; std::string resultstring; resultstring.resize(utf8size, '\0'); const UTF32* sourcestart = reinterpret_cast<const UTF32*>(widestring.c_str()); const UTF32* sourceend = sourcestart + widesize; UTF8* targetstart = reinterpret_cast<UTF8*>(&resultstring[0]); UTF8* targetend = targetstart + utf8size; ConversionResult res = ConvertUTF32toUTF8 (&sourcestart, sourceend, &targetstart, targetend, strictConversion); if (res != conversionOK) { throw std::exception("La falla!"); } *targetstart = 0; return resultstring; } else { throw std::exception("La falla!"); } return ""; }
// void System.Console.WriteLine(string) extern "C" void System_Console__WriteLine_System_String_(String* str) { if (str == NULL) printf("\n"); else { //printf("%.*s\n", str->length, str->value); uint32_t bufferLength = str->length * UNI_MAX_UTF8_BYTES_PER_CODE_POINT; uint8_t* buffer = (uint8_t*)malloc(bufferLength); const uint16_t* src = (const uint16_t*)str->value; uint8_t* dest = buffer; ConvertUTF16toUTF8(&src, src + str->length, &dest, dest + bufferLength, strictConversion); printf("%.*s\n", dest - buffer, buffer); free(buffer); } }
bool convertUTF16ToUTF8String(const CCWideString& utf16, std::string &Out) { assert(Out.empty()); // Avoid OOB by returning early on empty input. if (utf16.empty()) return true; const UTF16 *Src = reinterpret_cast<const UTF16 *>(utf16.data()); const UTF16 *SrcEnd = reinterpret_cast<const UTF16 *>(utf16.data() + utf16.length()); // Byteswap if necessary. std::vector<UTF16> ByteSwapped; if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_SWAPPED) { ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd); for (size_t I = 0, E = ByteSwapped.size(); I != E; ++I) ByteSwapped[I] = SwapByteOrder_16(ByteSwapped[I]); Src = &ByteSwapped[0]; SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1; } // Skip the BOM for conversion. if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_NATIVE) Src++; // Just allocate enough space up front. We'll shrink it later. Out.resize(utf16.length() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT + 1); UTF8 *Dst = reinterpret_cast<UTF8 *>(&Out[0]); UTF8 *DstEnd = Dst + Out.size(); ConversionResult CR = ConvertUTF16toUTF8(&Src, SrcEnd, &Dst, DstEnd, strictConversion); assert(CR != targetExhausted); if (CR != conversionOK) { #if defined(_MSC_VER) && _MSC_VER <= 1200 Out.erase(); #else Out.clear(); #endif return false; } Out.resize(reinterpret_cast<char *>(Dst) - &Out[0]); return true; }
std::string StringUtils::wstring_To_Utf8(const std::wstring& widestring) { size_t widesize = widestring.length(); if (sizeof(wchar_t) == 2) { size_t utf8size = 3 * widesize + 1; std::string resultstring; resultstring.resize(utf8size, '\0'); const UTF16* sourcestart = reinterpret_cast<const UTF16*>(widestring.c_str()); const UTF16* sourceend = sourcestart + widesize; UTF8* targetstart = reinterpret_cast<UTF8*>(&resultstring[0]); UTF8* targetend = targetstart + utf8size; ConversionResult res = ConvertUTF16toUTF8(&sourcestart, sourceend, &targetstart, targetend, strictConversion); if (res != conversionOK) { return std::string(widestring.begin(), widestring.end()); } *targetstart = 0; return std::string(resultstring.c_str()); } else if (sizeof(wchar_t) == 4) { size_t utf8size = 4 * widesize + 1; std::string resultstring; resultstring.resize(utf8size, '\0'); const UTF32* sourcestart = reinterpret_cast<const UTF32*>(widestring.c_str()); const UTF32* sourceend = sourcestart + widesize; UTF8* targetstart = reinterpret_cast<UTF8*>(&resultstring[0]); UTF8* targetend = targetstart + utf8size; ConversionResult res = ConvertUTF32toUTF8(&sourcestart, sourceend, &targetstart, targetend, strictConversion); if (res != conversionOK) { return std::string(widestring.begin(), widestring.end()); } *targetstart = 0; return std::string(resultstring.c_str()); } else { assert(false); } return ""; }
String w2u(const WString& wstr) { return w2u(wstr.c_str(), wstr.size()); #if 0 size_t len = wstr.size(); String result; result.resize(len * AX_UTF8_LEN_MAX); if (sizeof(wchar_t) == sizeof(UTF16)) { const UTF16* srcstart = (UTF16*)&wstr[0]; const UTF16* srcend = srcstart + len; UTF8* dststart = (UTF8*)&result[0]; UTF8* dstend = dststart + result.size(); ConversionResult cr = ConvertUTF16toUTF8(&srcstart, srcend, &dststart, dstend, strictConversion); result.resize(dststart - (UTF8*)&result[0]); } return result; #endif }
string X3fPropertyCollection::getString( ByteStream *bytes ) { uint32 max_len = bytes->getRemainSize() / 2; const UTF16* start = (const UTF16*)bytes->getData(); const UTF16* src_end = start; uint32 i = 0; for (; i < max_len && start == src_end; i++) { if (start[i] == 0) { src_end = &start[i]; } } if (start != src_end) { UTF8* dest = new UTF8[i * 4 + 1]; memset(dest, 0, i * 4 + 1); if (ConvertUTF16toUTF8(&start, src_end, &dest, &dest[i * 4 - 1])) { string ret((const char*)dest); delete[] dest; return ret; } delete[] dest; } return ""; }
const bool ToUTF8(const std::wstring &wcstring, std::string &utf8string) { if(wcstring.size()==0) { utf8string.assign(""); return true; } std::vector<std::wstring::value_type> source(wcstring.begin(),wcstring.end()); if(sizeof(std::wstring::value_type)==2 && sizeof(UTF16)==2) { std::vector<std::string::value_type> dest(wcstring.size()*2,0); const UTF16 *sourcestart=reinterpret_cast<const UTF16 *>(&source[0]); const UTF16 *sourceend=sourcestart+source.size(); UTF8 *deststart=reinterpret_cast<UTF8 *>(&dest[0]); UTF8 *destend=deststart+dest.size(); ConversionResult rval=ConvertUTF16toUTF8(&sourcestart,sourceend,&deststart,destend,lenientConversion); if(rval!=conversionOK) { return false; } utf8string.assign(dest.begin(),dest.end()-(destend-deststart)); } else if(sizeof(std::wstring::value_type)==4 && sizeof(UTF32)==4) { std::vector<std::string::value_type> dest(wcstring.size()*4,0); const UTF32 *sourcestart=reinterpret_cast<const UTF32 *>(&source[0]); const UTF32 *sourceend=sourcestart+source.size(); UTF8 *deststart=reinterpret_cast<UTF8 *>(&dest[0]); UTF8 *destend=deststart+dest.size(); ConversionResult rval=ConvertUTF32toUTF8(&sourcestart,sourceend,&deststart,destend,lenientConversion); if(rval!=conversionOK) { return false; } utf8string.assign(dest.begin(),dest.end()-(destend-deststart)); } else { std::vector<UTF32> source2(wcstring.begin(),wcstring.end()); std::vector<std::string::value_type> dest(wcstring.size()*sizeof(std::wstring::value_type),0); const UTF32 *sourcestart=reinterpret_cast<const UTF32 *>(&source2[0]); const UTF32 *sourceend=sourcestart+source2.size(); UTF8 *deststart=reinterpret_cast<UTF8 *>(&dest[0]); UTF8 *destend=deststart+dest.size(); ConversionResult rval=ConvertUTF32toUTF8(&sourcestart,sourceend,&deststart,destend,lenientConversion); if(rval!=conversionOK) { return false; } utf8string.assign(dest.begin(),dest.end()-(destend-deststart)); } return true; }
static int decode_unicode_char(JSON_parser jc) { const unsigned chars = jc->utf16_decode_buffer[0] ? 2 : 1; int i; UTF16 *uc = chars == 1 ? &jc->utf16_decode_buffer[0] : &jc->utf16_decode_buffer[1]; UTF16 x; char* p; assert(jc->parse_buffer_count >= 6); p = &jc->parse_buffer[jc->parse_buffer_count - 4]; for (i = 0; i < 4; ++i, ++p) { x = *p; if (x >= 'a') { x -= ('a' - 10); } else if (x >= 'A') { x -= ('A' - 10); } else { x &= ~((UTF16) 0x30); } assert(x < 16); *uc |= x << ((3u - i) << 2); } /* clear UTF-16 char form buffer */ jc->parse_buffer_count -= 6; jc->parse_buffer[jc->parse_buffer_count] = 0; /* attempt decoding ... */ { UTF8* dec_start = (UTF8*)&jc->parse_buffer[jc->parse_buffer_count]; UTF8* dec_start_dup = dec_start; UTF8* dec_end = dec_start + 6; const UTF16* enc_start = &jc->utf16_decode_buffer[0]; const UTF16* enc_end = enc_start + chars; const ConversionResult result = ConvertUTF16toUTF8( &enc_start, enc_end, &dec_start, dec_end, strictConversion); const size_t new_chars = dec_start - dec_start_dup; /* was it a surrogate UTF-16 char? */ if (chars == 1 && result == sourceExhausted) { return true; } if (result != conversionOK) { return false; } /* NOTE: clear decode buffer to resume string reading, otherwise we continue to read UTF-16 */ jc->utf16_decode_buffer[0] = 0; assert(new_chars <= 6); jc->parse_buffer_count += new_chars; jc->parse_buffer[jc->parse_buffer_count] = 0; } return true; }
StringData(const std::u16string &u16) { m_pU16 = new std::u16string(u16); if (conversionOK != ConvertUTF16toUTF8(u16, m_str, lenientConversion)) { throw TeamstudioException("Unable to convert text to UTF-8"); } }
int WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar, LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar) { #if !defined(WITH_ICU) int length; const WCHAR* sourceStart; ConversionResult result; BYTE* targetStart; #else char* targetStart; #endif /* If cchWideChar is 0, the function fails */ if ((cchWideChar == 0) || (cchWideChar < -1)) return 0; /* If cchWideChar is -1, the string is null-terminated */ if (cchWideChar == -1) { size_t len = _wcslen(lpWideCharStr); if (len >= INT32_MAX) return 0; cchWideChar = (int)len + 1; } /* * if cbMultiByte is 0, the function returns the required buffer size * in bytes for lpMultiByteStr and makes no use of the output parameter itself. */ #if defined(WITH_ICU) { UErrorCode error; int32_t targetLength; int32_t targetCapacity; switch (CodePage) { case CP_ACP: case CP_UTF8: break; default: WLog_ERR(TAG, "Unsupported encoding %u", CodePage); return 0; } targetStart = lpMultiByteStr; targetCapacity = cbMultiByte; error = U_ZERO_ERROR; if (cbMultiByte == 0) { u_strToUTF8(NULL, 0, &targetLength, lpWideCharStr, cchWideChar, &error); cbMultiByte = targetLength; } else { u_strToUTF8(targetStart, targetCapacity, &targetLength, lpWideCharStr, cchWideChar, &error); cbMultiByte = U_SUCCESS(error) ? targetLength : 0; } } #else if (cbMultiByte == 0) { sourceStart = (WCHAR*) lpWideCharStr; targetStart = (BYTE*) NULL; result = ConvertUTF16toUTF8(&sourceStart, &sourceStart[cchWideChar], &targetStart, NULL, strictConversion); length = targetStart - ((BYTE*) NULL); } else { sourceStart = (WCHAR*) lpWideCharStr; targetStart = (BYTE*) lpMultiByteStr; result = ConvertUTF16toUTF8(&sourceStart, &sourceStart[cchWideChar], &targetStart, &targetStart[cbMultiByte], strictConversion); length = targetStart - ((BYTE*) lpMultiByteStr); } cbMultiByte = (result == conversionOK) ? length : 0; #endif return cbMultiByte; }
/* * If len>=0, wp is an array of <len> wide characters without a * termination character. * If len==-1, wp is a null-terminated wide string */ static SV * _dosvwv(SV * sv, UTF16 * wp, STRLEN len, new_cat_set_t mode) { char * p=NULL; STRLEN svlen; #ifdef WIN32 int bytes; bytes=WideCharToMultiByte(CP_UTF8,0,wp,len,NULL,0,NULL,NULL); Newz(0,p,1+bytes,char); /* allocate bytes+1 chars - ptr to p */ if (bytes!=0) { if(!WideCharToMultiByte(CP_UTF8,0,wp,len,p,bytes,NULL,NULL)) { int err=GetLastError(); switch (err) { case ERROR_INSUFFICIENT_BUFFER: croak("_dosvwv: WideCharToMultiByte() failed: insufficient buffer"); case ERROR_INVALID_FLAGS: croak("_dosvwv: WideCharToMultiByte() failed: invalid flags"); case ERROR_INVALID_PARAMETER: croak("_dosvwv: WideCharToMultiByte() failed: invalid parameter"); default: croak("_dosvwv: WideCharToMultiByte() failed: error code %i",err); } } } svlen=(len==-1 ? strlen(p) : bytes); #else unsigned int bytes; if (len == -1) { len = utf16_len(wp); } if (len > 0) { ConversionResult ret; UTF16 *source_start = wp; UTF16 *source_end = source_start + len; UTF8 *target_start; UTF8 *target_end; /* Test conversion and find size UTF* of buffer we need */ ret = ConvertUTF16toUTF8((const UTF16 **)&source_start, source_end, NULL, NULL, strictConversion, &bytes); /*printf("Bytes Required = %d\n", bytes);*/ if (ret != conversionOK) { if (ret == sourceExhausted) { croak("_dosvwc: Partial character in input"); } else if (ret == targetExhausted) { croak("_dosvwc: target buffer exhausted"); } else if (ret == sourceIllegal) { croak("_dosvwc: malformed/illegal source sequence"); } else { croak("_dosvwc: unknown ConvertUTF16toUTF8 error"); } } Newz(0, p, bytes + 1, char); /* convert UTF16 to UTF8 */ target_start = p; target_end = p + bytes; source_start = (UTF16 *)wp; source_end = source_start + len; ret = ConvertUTF16toUTF8((const UTF16 **)&source_start, source_end, &target_start, target_end, strictConversion, &bytes); /*fprintf(stderr, "%s\n", p);*/ if (ret != conversionOK) { croak("_dosvwc: second call to ConvertUTF16toUTF8 failed (%d)", ret); } svlen = bytes; } else {
int MDFNConsole::Event(const SDL_Event *event) { switch(event->type) { case SDL_KEYDOWN: if(event->key.keysym.mod & KMOD_ALT) break; switch(event->key.keysym.sym) { case SDLK_HOME: kb_cursor_pos = 0; break; case SDLK_END: kb_cursor_pos = kb_buffer.size(); break; case SDLK_LEFT: if(kb_cursor_pos) kb_cursor_pos--; break; case SDLK_RIGHT: if(kb_cursor_pos < kb_buffer.size()) kb_cursor_pos++; break; case SDLK_UP: Scroll(1); break; case SDLK_DOWN: Scroll(-1); break; case SDLK_RETURN: { std::string concat_str; for(unsigned int i = 0; i < kb_buffer.size(); i++) concat_str += kb_buffer[i]; TextHook((UTF8*)strdup(concat_str.c_str())); kb_buffer.clear(); kb_cursor_pos = 0; } break; case SDLK_BACKSPACE: if(kb_buffer.size() && kb_cursor_pos) { kb_buffer.erase(kb_buffer.begin() + kb_cursor_pos - 1, kb_buffer.begin() + kb_cursor_pos); kb_cursor_pos--; } break; case SDLK_DELETE: if(kb_buffer.size() && kb_cursor_pos < kb_buffer.size()) { kb_buffer.erase(kb_buffer.begin() + kb_cursor_pos, kb_buffer.begin() + kb_cursor_pos + 1); } break; default: if(event->key.keysym.unicode >= 0x20) { uint8 utf8_buffer[8]; UTF8 *dest_ptr = utf8_buffer; memset(utf8_buffer, 0, sizeof(utf8_buffer)); const UTF16 *start_utf16 = &event->key.keysym.unicode; ConvertUTF16toUTF8(&start_utf16, (UTF16 *)&event->key.keysym.unicode + 1, &dest_ptr, &utf8_buffer[8], lenientConversion); kb_buffer.insert(kb_buffer.begin() + kb_cursor_pos, std::string((char *)utf8_buffer)); kb_cursor_pos++; } break; } break; } return(1); }