int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr, int cbMultiByte, LPWSTR lpWideCharStr, int cchWideChar) { int length; LPWSTR targetStart; const BYTE* sourceStart; ConversionResult result; /* If cbMultiByte is 0, the function fails */ if (cbMultiByte == 0) return 0; /* If cbMultiByte is -1, the string is null-terminated */ if (cbMultiByte == -1) cbMultiByte = strlen((char*) lpMultiByteStr) + 1; /* * if cchWideChar is 0, the function returns the required buffer size * in characters for lpWideCharStr and makes no use of the output parameter itself. */ if (cchWideChar == 0) { sourceStart = (const BYTE*) lpMultiByteStr; targetStart = (WCHAR*) NULL; result = ConvertUTF8toUTF16(&sourceStart, &sourceStart[cbMultiByte], &targetStart, NULL, strictConversion); length = targetStart - ((WCHAR*) NULL); cchWideChar = length; } else { sourceStart = (const BYTE*) lpMultiByteStr; targetStart = lpWideCharStr; result = ConvertUTF8toUTF16(&sourceStart, &sourceStart[cbMultiByte], &targetStart, &targetStart[cchWideChar], strictConversion); length = targetStart - ((WCHAR*) lpWideCharStr); cchWideChar = length; } return cchWideChar; }
std::wstring * keymagic_driver::U8toU16(const std::string& u8) { unsigned int length = u8.length(); length++; const UTF8 * source = (UTF8*)u8.c_str(); const UTF8 * sourceStart = source; const UTF8 * sourceEnd = source + length; UTF16 * target = new UTF16[length]; UTF16 * targetStart = target; UTF16 * targetEnd = target + length; ConvertUTF8toUTF16(&sourceStart, sourceEnd, &targetStart, targetEnd, lenientConversion); wchar_t * wcs = new wchar_t[length]; memset(wcs, 0, length * sizeof (wchar_t)); for (int i = 0; i < targetStart - target; i++) { wcs[i] = target[i]; } std::wstring * s = new std::wstring(wcs); delete[] wcs; delete[] target; return s; }
bool convertUTF8ToUTF16String(StringRef SrcUTF8, SmallVectorImpl<UTF16> &DstUTF16) { assert(DstUTF16.empty()); // Avoid OOB by returning early on empty input. if (SrcUTF8.empty()) return true; const UTF8 *Src = reinterpret_cast<const UTF8 *>(SrcUTF8.begin()); const UTF8 *SrcEnd = reinterpret_cast<const UTF8 *>(SrcUTF8.end()); // Allocate the same number of UTF-16 code units as UTF-8 code units. Encoding // as UTF-16 should always require the same amount or less code units than the // UTF-8 encoding. Allocate one extra byte for the null terminator though, // so that someone calling DstUTF16.data() gets a null terminated string. // We resize down later so we don't have to worry that this over allocates. DstUTF16.resize(SrcUTF8.size()+1); UTF16 *Dst = &DstUTF16[0]; UTF16 *DstEnd = Dst + DstUTF16.size(); ConversionResult CR = ConvertUTF8toUTF16(&Src, SrcEnd, &Dst, DstEnd, strictConversion); assert(CR != targetExhausted); if (CR != conversionOK) { DstUTF16.clear(); return false; } DstUTF16.resize(Dst - &DstUTF16[0]); DstUTF16.push_back(0); DstUTF16.pop_back(); return true; }
//------------------------------------------------------------------------------ // UTF8ToString16 //------------------------------------------------------------------------------ bool UTF8ToString16(const char *in, int len, std::string16 *out16) { assert(in); assert(len >= 0); assert(out16); if (len <= 0) { *out16 = STRING16(L""); return true; } const UTF8 *source_ptr = reinterpret_cast<const UTF8*>(in); const UTF8 *source_end_ptr = source_ptr + len; // should point 'beyond last' // UTF16 string has at most as many 'characters' as UTF8 one. out16->resize(len); UTF16 *target_ptr = reinterpret_cast<UTF16*>(&(*out16)[0]); UTF16 *target_ptr_original = target_ptr; UTF16 *target_end_ptr = target_ptr + len; ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr, &target_ptr, target_end_ptr, strictConversion); // Resize to be the size of the # of converted characters. // Note that stl strings always account for \0 end-of-line character // automatically, so no need to do "+1" here. out16->resize(result == conversionOK ? target_ptr - target_ptr_original : 0); return result == conversionOK; }
inline int utf8_wchar(const std::string &utf8, std::wstring &wide) { // allocate space for worst-case wide.resize(utf8.size()); wchar_t const* dst_start = wide.c_str(); char const* src_start = utf8.c_str(); ConversionResult ret; if (sizeof(wchar_t) == sizeof(UTF32)) { ret = ConvertUTF8toUTF32((const UTF8**)&src_start, (const UTF8*)src_start + utf8.size(), (UTF32**)&dst_start, (UTF32*)dst_start + wide.size() , lenientConversion); wide.resize(dst_start - wide.c_str()); return ret; } else if (sizeof(wchar_t) == sizeof(UTF16)) { ret = ConvertUTF8toUTF16((const UTF8**)&src_start, (const UTF8*)src_start + utf8.size(), (UTF16**)&dst_start, (UTF16*)dst_start + wide.size() , lenientConversion); wide.resize(dst_start - wide.c_str()); return ret; } else { return sourceIllegal; } }
utf8_conv_result_t utf8_wchar(const std::string &utf8, std::wstring &wide) { // allocate space for worst-case wide.resize(utf8.size()); wchar_t const* dst_start = wide.c_str(); char const* src_start = utf8.c_str(); ConversionResult ret; // TODO: 3 refactor this to use wchar_t as a template // it would cause less code to be generated without // relying on dead-code elimination and fix msvc constant // expression warning if (sizeof(wchar_t) == sizeof(UTF32)) { ret = ConvertUTF8toUTF32((const UTF8**)&src_start, (const UTF8*)src_start + utf8.size(), (UTF32**)&dst_start, (UTF32*)dst_start + wide.size() , lenientConversion); wide.resize(dst_start - wide.c_str()); return (utf8_conv_result_t)ret; } else if (sizeof(wchar_t) == sizeof(UTF16)) { ret = ConvertUTF8toUTF16((const UTF8**)&src_start, (const UTF8*)src_start + utf8.size(), (UTF16**)&dst_start, (UTF16*)dst_start + wide.size() , lenientConversion); wide.resize(dst_start - wide.c_str()); return (utf8_conv_result_t)ret; } else { return source_illegal; } }
std::u16string &u16str() { if (NULL == m_pU16) { m_pU16 = new std::u16string(); ConvertUTF8toUTF16(m_str, *m_pU16, lenientConversion); } return *m_pU16; }
std::wstring UTF8toWS(const AnsiString& s){ std::wstring Value; if (s.size()==0) { return Value; } Value.resize(s.size()); //UTF16* buf = new UTF16[s.size()+1]; const UTF8* Start = (const UTF8*)s.c_str(); const UTF8* End = Start + s.size(); #ifdef WIN32 UTF16* DestStart = (UTF16*)(&Value[0]); UTF16* DestEnd = DestStart + s.size(); ConversionResult ret = ConvertUTF8toUTF16(&Start,End, &DestStart, DestEnd, strictConversion); #else UTF32* DestStart = (UTF32*)(&Value[0]); UTF32* DestEnd = DestStart + s.size(); ConversionResult ret = ConvertUTF8toUTF32(&Start,End, &DestStart, DestEnd, strictConversion); #endif if (ret != conversionOK) { if(ret == sourceIllegal) // { int n = mbstowcs(NULL,s.c_str(),0); Value.resize(n); setlocale(LC_ALL,""); mbstowcs((wchar_t*)Value.c_str(),s.c_str(),n); return Value; }else{ //throw std::exception("UFT8 Convert Fail."); 这种工具函数扔出例外似乎只会对宿主程序构成骚扰 //改为给出提示+可能的乱码 Value = _T("UFT8TO16 Convert Fail:"); int n = mbstowcs(NULL,s.c_str(),0); tstring s1(n,0); setlocale(LC_ALL,""); mbstowcs((wchar_t*)s1.c_str(),s.c_str(),n); Value.insert(Value.end(),s1.begin(),s1.end()); return Value; } } /* else{ Value = (wchar_t*)buf; delete buf; } */ tstring::size_type n = Value.find_first_of(_T('\0')); Value = Value.substr(0,n); return Value; }
LPWSTR ConvertFileToUnicode(LPCWSTR fileName) // convert a UTF8 text file to UNICODE { LPWSTR pszTempSource; DWORD dwNumBytes; HANDLE hFile; DWORD fileSize; LPSTR pNarrowBuf; uint16_t* pBufU; BOOL ok; // open file & get file size hFile = ::MakeSISOpenFile(fileName, GENERIC_READ, OPEN_EXISTING); if(hFile == INVALID_HANDLE_VALUE) throw ErrCannotOpenFile; fileSize = ::GetFileSize(hFile, NULL); // read in whole file pNarrowBuf = new CHAR [fileSize]; if (!pNarrowBuf) throw ErrNotEnoughMemory; ok=::ReadFile(hFile, (LPVOID)pNarrowBuf, fileSize, &dwNumBytes, NULL); ::CloseHandle(hFile); if (!ok) throw ErrCannotReadFile; // write to new temporary file pszTempSource=TempFileName(fileName); hFile = ::MakeSISOpenFile(pszTempSource, GENERIC_WRITE|GENERIC_READ, CREATE_NEW); if (hFile==INVALID_HANDLE_VALUE) throw ErrCannotOpenFile; pBufU=new uint16_t[fileSize]; if (!pBufU) throw ErrNotEnoughMemory; // convert text to unicode const UTF8* sourceStart=(UTF8*)pNarrowBuf; const UTF8* sourceEnd=sourceStart+fileSize; UTF16* targetStart; UTF16* targetEnd; while (ok && sourceStart<sourceEnd) { targetStart=(UTF16 *)pBufU; targetEnd=(UTF16 *)(pBufU+fileSize); int ret = ConvertUTF8toUTF16(&sourceStart, sourceEnd, &targetStart, targetEnd,lenientConversion); if (ret == sourceIllegal) { *targetStart++ = *sourceStart++; } if (ok) ok=::WriteFile(hFile, (LPVOID)pBufU, ((char*)targetStart-(char*)pBufU), &dwNumBytes, NULL); } ::CloseHandle(hFile); delete [] pNarrowBuf; delete [] pBufU; if (!ok) throw ErrCannotConvertFile; return pszTempSource; }
CYUTF16String CYPoolUTF16String(CYPool &pool, CYUTF8String utf8) { // XXX: this is wrong size_t size(utf8.size * 5); uint16_t *temp(new (pool) uint16_t[size]); const uint8_t *lhs(reinterpret_cast<const uint8_t *>(utf8.data)); uint16_t *rhs(temp); _assert(ConvertUTF8toUTF16(&lhs, lhs + utf8.size, &rhs, rhs + size, lenientConversion) == conversionOK); *rhs = 0; return CYUTF16String(temp, rhs - temp); }
WideString StringUtils::utf8String2WideString( const String& utf8String ) { size_t widesize = utf8String.length(); WideString returnWideString; if ( sizeof( wchar_t ) == 2 ) { returnWideString.resize( widesize + 1, L'\0' ); const UTF8* sourcestart = reinterpret_cast<const UTF8*>( utf8String.c_str() ); const UTF8* sourceend = sourcestart + widesize; UTF16* targetstart = reinterpret_cast<UTF16*>( &((returnWideString)[ 0 ]) ); UTF16* thisFirstWChar = targetstart; UTF16* targetend = targetstart + widesize; ConversionResult res = ConvertUTF8toUTF16( &sourcestart, sourceend, &targetstart, targetend, strictConversion ); returnWideString.resize(targetstart - thisFirstWChar); if ( res != conversionOK ) { throw Exception(Exception::ERROR_UTF8_2_WIDE, String("Could not convert from UTF8 to wide string.")); } *targetstart = 0; } else if ( sizeof( wchar_t ) == 4 ) { returnWideString.resize( widesize + 1, L'\0' ); const UTF8* sourcestart = reinterpret_cast<const UTF8*>( utf8String.c_str() ); const UTF8* sourceend = sourcestart + widesize; UTF32* targetstart = reinterpret_cast<UTF32*>( &((returnWideString)[ 0 ]) ); UTF32* thisFirstWChar = targetstart; UTF32* targetend = targetstart + widesize; ConversionResult res = ConvertUTF8toUTF32( &sourcestart, sourceend, &targetstart, targetend, strictConversion ); returnWideString.resize(targetstart - thisFirstWChar); if ( res != conversionOK ) { throw Exception(Exception::ERROR_UTF8_2_WIDE, String("Could not convert from UTF8 to wide string.")); } *targetstart = 0; } else { throw Exception(Exception::ERROR_UTF8_2_WIDE, String("Could not convert from UTF8 to wide string.")); } return returnWideString; }
inline WString u2w(const char* utf8str, size_t len) { WString result; result.resize(len); if (sizeof(wchar_t) == sizeof(UTF16)) { const UTF8* srcstart = (UTF8*)&utf8str[0]; const UTF8* srcend = srcstart + len; UTF16* dststart = (UTF16*)&result[0]; UTF16* dstend = dststart + len; ConversionResult cr = ConvertUTF8toUTF16(&srcstart, srcend, &dststart, dstend, strictConversion); result.resize(dststart - (UTF16*)&result[0]); } return result; }
int str_utf8_to_u16(u16 **dst, u32 *dst_len, u8 *src, u32 src_len) { *dst_len = src_len*sizeof(u16); *dst = malloc((*dst_len)+sizeof(u16)); if(*dst == NULL) return -1; memset(*dst,0,(*dst_len)+sizeof(u16)); UTF16 *target_start = *dst; UTF16 *target_end = (target_start + *dst_len); UTF8 *src_start = (UTF8*)src; UTF8 *src_end = (UTF8*)(src+src_len*sizeof(u8)); return ConvertUTF8toUTF16 ((const UTF8 **)&src_start, src_end, &target_start, target_end, strictConversion); }
void writeString(const char* value, int len, FILE* out) { UTF16* utf16String = new UTF16[len]; const UTF8* sourceStart = (UTF8*) value; UTF8* sourceEnd = (UTF8*) value + len; UTF16* targetStart = utf16String; UTF16* targetEnd = utf16String + len; if (ConvertUTF8toUTF16(&sourceStart, sourceEnd, &targetStart, targetEnd, lenientConversion) != conversionOK) { fprintf(stderr, "Unable to interpret argument as UTF8 text\n"); throw ErrBadUTF8; } uint32_t datalen = (intptr_t) ((uint8_t*)targetStart - (uint8_t*)utf16String); writeUint8(datalen, out); for (UTF16* ptr = utf16String; ptr < targetStart; ptr++) writeUint16(*ptr, out); delete [] utf16String; }
bool ConvertUTF8toWide(unsigned WideCharWidth, const std::string& Source, char *&ResultPtr, const UTF8 *&ErrorPtr) { assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4); ConversionResult result = conversionOK; // Copy the character span over. if (WideCharWidth == 1) { const UTF8 *Pos = reinterpret_cast<const UTF8*>(Source.data()); if (!isLegalUTF8String(&Pos, reinterpret_cast<const UTF8*>(Source.data() + Source.length()))) { result = sourceIllegal; ErrorPtr = Pos; } else { memcpy(ResultPtr, Source.data(), Source.size()); ResultPtr += Source.size(); } } else if (WideCharWidth == 2) { const UTF8 *sourceStart = (const UTF8*)Source.data(); // FIXME: Make the type of the result buffer correct instead of // using reinterpret_cast. UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr); ConversionFlags flags = strictConversion; result = ConvertUTF8toUTF16( &sourceStart, sourceStart + Source.size(), &targetStart, targetStart + 2*Source.size(), flags); if (result == conversionOK) ResultPtr = reinterpret_cast<char*>(targetStart); else ErrorPtr = sourceStart; } else if (WideCharWidth == 4) { const UTF8 *sourceStart = (const UTF8*)Source.data(); // FIXME: Make the type of the result buffer correct instead of // using reinterpret_cast. UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr); ConversionFlags flags = strictConversion; result = ConvertUTF8toUTF32( &sourceStart, sourceStart + Source.size(), &targetStart, targetStart + 4*Source.size(), flags); if (result == conversionOK) ResultPtr = reinterpret_cast<char*>(targetStart); else ErrorPtr = sourceStart; } assert((result != targetExhausted) && "ConvertUTF8toUTFXX exhausted target buffer"); return result == conversionOK; }
std::wstring StringUtils::Utf8_To_wstring(const std::string& utf8string) { if (utf8string.length()==0) { return std::wstring(); } size_t widesize = utf8string.length(); if (sizeof(wchar_t) == 2) { std::wstring resultstring; resultstring.resize(widesize, L'\0'); const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str()); const UTF8* sourceend = sourcestart + widesize; UTF16* targetstart = reinterpret_cast<UTF16*>(&resultstring[0]); UTF16* targetend = targetstart + widesize; ConversionResult res = ConvertUTF8toUTF16(&sourcestart, sourceend, &targetstart, targetend, strictConversion); if (res != conversionOK) { return std::wstring(utf8string.begin(), utf8string.end()); } *targetstart = 0; return std::wstring(resultstring.c_str()); } else if (sizeof(wchar_t) == 4) { std::wstring resultstring; resultstring.resize(widesize, L'\0'); const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str()); const UTF8* sourceend = sourcestart + widesize; UTF32* targetstart = reinterpret_cast<UTF32*>(&resultstring[0]); UTF32* targetend = targetstart + widesize; ConversionResult res = ConvertUTF8toUTF32(&sourcestart, sourceend, &targetstart, targetend, strictConversion); if (res != conversionOK) { return std::wstring(utf8string.begin(), utf8string.end()); } *targetstart = 0; return std::wstring(resultstring.c_str()); } else { assert(false); } return L""; }
static std::wstring FromUtf8(const std::string& utf8string) { size_t widesize = utf8string.length(); if (sizeof(wchar_t) == 2) { wchar_t* widestringnative = new wchar_t[widesize+1]; const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str()); const UTF8* sourceend = sourcestart + widesize; UTF16* targetstart = reinterpret_cast<UTF16*>(widestringnative); UTF16* targetend = targetstart + widesize+1; ConversionResult res = ConvertUTF8toUTF16(&sourcestart, sourceend, &targetstart, targetend, strictConversion); if (res != conversionOK) { delete [] widestringnative; throw std::exception(); } *targetstart = 0; std::wstring resultstring(widestringnative); delete [] widestringnative; return resultstring; } else if (sizeof(wchar_t) == 4) { wchar_t* widestringnative = new wchar_t[widesize+1]; const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str()); const UTF8* sourceend = sourcestart + widesize; UTF32* targetstart = reinterpret_cast<UTF32*>(widestringnative); UTF32* targetend = targetstart + widesize+1; ConversionResult res = ConvertUTF8toUTF32(&sourcestart, sourceend, &targetstart, targetend, strictConversion); if (res != conversionOK) { delete [] widestringnative; throw std::exception(); } *targetstart = 0; std::wstring resultstring(widestringnative); delete [] widestringnative; return resultstring; } else { throw std::exception(); } return L""; }
WString u2w(const String& utf8str) { return u2w(utf8str.c_str(), utf8str.size()); #if 0 size_t len = utf8str.size(); WString result; result.resize(len); if (sizeof(wchar_t) == sizeof(UTF16)) { const UTF8* srcstart = (UTF8*)&utf8str[0]; const UTF8* srcend = srcstart + len; UTF16* dststart = (UTF16*)&result[0]; UTF16* dstend = dststart + len; ConversionResult cr = ConvertUTF8toUTF16(&srcstart, srcend, &dststart, dstend, strictConversion); result.resize(dststart - (UTF16*)&result[0]); } return result; #endif }
std::wstring FromUtf8(const std::string& utf8string) { size_t widesize = utf8string.length(); if (sizeof(wchar_t) == 2) { std::wstring resultstring; resultstring.resize(widesize+1, L'\0'); const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str()); const UTF8* sourceend = sourcestart + widesize; UTF16* targetstart = reinterpret_cast<UTF16*>(&resultstring[0]); UTF16* targetend = targetstart + widesize; ConversionResult res = ConvertUTF8toUTF16 (&sourcestart, sourceend, &targetstart, targetend, strictConversion); if (res != conversionOK) { throw std::exception("La falla!"); } *targetstart = 0; return resultstring; } else if (sizeof(wchar_t) == 4) { std::wstring resultstring; resultstring.resize(widesize+1, L'\0'); const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str()); const UTF8* sourceend = sourcestart + widesize; UTF32* targetstart = reinterpret_cast<UTF32*>(&resultstring[0]); UTF32* targetend = targetstart + widesize; ConversionResult res = ConvertUTF8toUTF32 (&sourcestart, sourceend, &targetstart, targetend, strictConversion); if (res != conversionOK) { throw std::exception("La falla!"); } *targetstart = 0; return resultstring; } else { throw std::exception("La falla!"); } return L""; }
intp StringParser::ConvertToBuffer(const char* str, size_t length, WHeapString& outBuffer) { outBuffer.Clear(); if (length == 0) { return 0; } outBuffer.ReserveSize(length + 1); const UTF8* sourceStart = reinterpret_cast<const UTF8*>(str); const UTF8* sourceEnd = sourceStart + length; constexpr bool isUTF16 = sizeof(wchar_t) == 2; if (isUTF16) { UTF16* targetStart = reinterpret_cast<UTF16*>(outBuffer.MutableBuffer()); UTF16* targetEnd = targetStart + length; ConversionResult res = ConvertUTF8toUTF16(&sourceStart, sourceEnd, &targetStart, targetEnd, strictConversion); *targetStart = 0; if (res == conversionOK) { intp count = targetStart - reinterpret_cast<UTF16*>(outBuffer.MutableBuffer()); outBuffer.ForceSetLength(count); return count; } } else { UTF32* targetStart = reinterpret_cast<UTF32*>(outBuffer.MutableBuffer()); UTF32* targetEnd = targetStart + length; ConversionResult res = ConvertUTF8toUTF32(&sourceStart, sourceEnd, &targetStart, targetEnd, strictConversion); *targetStart = 0; if (res == conversionOK) { intp count = targetStart - reinterpret_cast<UTF32*>(outBuffer.MutableBuffer()); outBuffer.ForceSetLength(count); return count; } } return 0; }
static utf8_errors::error_code_enum convert(char const** src_start , char const* src_end , std::wstring& wide) { wchar_t* dst_start = &wide[0]; int ret = ConvertUTF8toUTF16( reinterpret_cast<UTF8 const**>(src_start) , reinterpret_cast<UTF8 const*>(src_end) , reinterpret_cast<UTF16**>(&dst_start) , reinterpret_cast<UTF16*>(dst_start + wide.size()) , lenientConversion); if (ret == sourceIllegal) { // assume Latin-1 wide.clear(); std::copy(reinterpret_cast<std::uint8_t const*>(*src_start) , reinterpret_cast<std::uint8_t const*>(src_end) , std::back_inserter(wide)); return static_cast<utf8_errors::error_code_enum>(ret); } wide.resize(aux::numeric_cast<std::size_t>(dst_start - wide.c_str())); return static_cast<utf8_errors::error_code_enum>(ret); }
// Converts UTF8 to UTF16 uint16_t* utf8to16(uint8_t* txt, int &len16) { // Length of UTF8 version of the search string int len = 0; while (txt[len] != 0) { len++; } uint16_t* txt16 = (uint16_t*) malloc((len + 1)*2); uint16_t* txt16End = txt16; ConversionResult res = ConvertUTF8toUTF16((const UTF8**) &txt, (const UTF8*) (txt + len), (UTF16**) &txt16End, (UTF16*) (txt16 + len*2), lenientConversion); if (res != conversionOK) { printf("Failed to convert input to UTF8"); exit(ERR_INTERNAL_ERROR); } // Size of UTF16 version of the search string (in two byte characters) len16 = (int) (txt16End - txt16); // Set trailing zero txt16[len16] = 0; return txt16; };
const bool FromUTF8(const std::vector<std::string::value_type> &utf8string, std::wstring &wcstring) { if(utf8string.size()==0) { wcstring.assign(L""); return true; } std::vector<std::wstring::value_type> dest(utf8string.size(),0); // dest will never be bigger than the input but could be smaller const UTF8 *sourcestart=reinterpret_cast<const UTF8 *>(&utf8string[0]); const UTF8 *sourceend=sourcestart+utf8string.size(); if(sizeof(std::wstring::value_type)==2 && sizeof(UTF16)==2) { UTF16 *deststart=reinterpret_cast<UTF16 *>(&dest[0]); UTF16 *destend=deststart+dest.size(); ConversionResult rval=ConvertUTF8toUTF16(&sourcestart,sourceend,&deststart,destend,lenientConversion); if(rval!=conversionOK) { return false; } wcstring.assign(dest.begin(),dest.end()-(destend-deststart)); } else if(sizeof(std::wstring::value_type)==4 && sizeof(UTF32)==4) { UTF32 *deststart=reinterpret_cast<UTF32 *>(&dest[0]); UTF32 *destend=deststart+dest.size(); ConversionResult rval=ConvertUTF8toUTF32(&sourcestart,sourceend,&deststart,destend,lenientConversion); if(rval!=conversionOK) { return false; } wcstring.assign(dest.begin(),dest.end()-(destend-deststart)); } else { std::vector<UTF32> dest2(utf8string.size(),0); UTF32 *deststart=reinterpret_cast<UTF32 *>(&dest2[0]); UTF32 *destend=deststart+dest2.size(); ConversionResult rval=ConvertUTF8toUTF32(&sourcestart,sourceend,&deststart,destend,lenientConversion); if(rval!=conversionOK) { return false; } wcstring.assign(dest2.begin(),dest2.end()-(destend-deststart)); } return true; }
int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr, int cbMultiByte, LPWSTR lpWideCharStr, int cchWideChar) { LPWSTR targetStart; #if !defined(WITH_ICU) const BYTE* sourceStart; int length; ConversionResult result; #endif /* If cbMultiByte is 0, the function fails */ if ((cbMultiByte == 0) || (cbMultiByte < -1)) return 0; /* If cbMultiByte is -1, the string is null-terminated */ if (cbMultiByte == -1) { size_t len = strlen((const char*) lpMultiByteStr); if (len >= INT32_MAX) return 0; cbMultiByte = (int)len + 1; } /* * if cchWideChar is 0, the function returns the required buffer size * in characters for lpWideCharStr and makes no use of the output parameter itself. */ #if defined(WITH_ICU) { UErrorCode error; int32_t targetLength; int32_t targetCapacity; switch (CodePage) { case CP_ACP: case CP_UTF8: break; default: WLog_ERR(TAG, "Unsupported encoding %u", CodePage); return 0; } targetStart = lpWideCharStr; targetCapacity = cchWideChar; error = U_ZERO_ERROR; if (cchWideChar == 0) { u_strFromUTF8(NULL, 0, &targetLength, lpMultiByteStr, cbMultiByte, &error); cchWideChar = targetLength; } else { u_strFromUTF8(targetStart, targetCapacity, &targetLength, lpMultiByteStr, cbMultiByte, &error); cchWideChar = U_SUCCESS(error) ? targetLength : 0; } } #else if (cchWideChar == 0) { sourceStart = (const BYTE*) lpMultiByteStr; targetStart = (WCHAR*) NULL; result = ConvertUTF8toUTF16(&sourceStart, &sourceStart[cbMultiByte], &targetStart, NULL, strictConversion); length = targetStart - ((WCHAR*) NULL); } else { sourceStart = (const BYTE*) lpMultiByteStr; targetStart = lpWideCharStr; result = ConvertUTF8toUTF16(&sourceStart, &sourceStart[cbMultiByte], &targetStart, &targetStart[cchWideChar], strictConversion); length = targetStart - ((WCHAR*) lpWideCharStr); } cchWideChar = (result == conversionOK) ? length : 0; #endif return cchWideChar; }
static int _check_mtime(GeoIP *gi) { struct stat buf; if (gi->flags & GEOIP_CHECK_CACHE) { if (stat(gi->file_path, &buf) != -1) { if (buf.st_mtime != gi->mtime) { int name_len; wchar_t* wfilename; wchar_t const* dst_start; char const* src_start; /* GeoIP Database file updated */ if (gi->flags & (GEOIP_MEMORY_CACHE | GEOIP_MMAP_CACHE)) { #if !defined WIN32 && !defined __OS2__ if ( gi->flags & GEOIP_MMAP_CACHE) { munmap(gi->cache, gi->size); gi->cache = NULL; } else #endif { /* reload database into memory cache */ if ((gi->cache = (unsigned char*) realloc(gi->cache, buf.st_size)) == NULL) { fprintf(stderr,"Out of memory when reloading %s\n",gi->file_path); return -1; } } } /* refresh filehandle */ fclose(gi->GeoIPDatabase); #ifdef WIN32 assert(sizeof(wchar_t) == 2); name_len = strlen(gi->file_path); wfilename = malloc((name_len + 1) * sizeof(wchar_t)); dst_start = wfilename; src_start = gi->file_path; ConvertUTF8toUTF16((const UTF8**)&src_start, (const UTF8*)src_start + name_len+1, (UTF16**)&dst_start, (UTF16*)dst_start + name_len + 1 , lenientConversion); gi->GeoIPDatabase = _wfopen(wfilename,L"rb"); free(wfilename); #else gi->GeoIPDatabase = fopen(gi->file_path,"rb"); #endif if (gi->GeoIPDatabase == NULL) { fprintf(stderr,"Error Opening file %s when reloading\n",gi->file_path); return -1; } gi->mtime = buf.st_mtime; gi->size = buf.st_size; #if !defined WIN32 && !defined __OS2__ if ( gi->flags & GEOIP_MMAP_CACHE) { gi->cache = (unsigned char*)mmap(NULL, buf.st_size, PROT_READ, MAP_PRIVATE, fileno(gi->GeoIPDatabase), 0); if ( gi->cache == MAP_FAILED ) { fprintf(stderr,"Error remapping file %s when reloading\n",gi->file_path); gi->cache = 0; return -1; } } else #endif if ( gi->flags & GEOIP_MEMORY_CACHE ) { if (fread(gi->cache, sizeof(unsigned char), buf.st_size, gi->GeoIPDatabase) != (size_t) buf.st_size) { fprintf(stderr,"Error reading file %s when reloading\n",gi->file_path); return -1; } } if (gi->databaseSegments != NULL) { free(gi->databaseSegments); gi->databaseSegments = NULL; } _setup_segments(gi); if (gi->databaseSegments == NULL) { fprintf(stderr, "Error reading file %s -- corrupt\n", gi->file_path); return -1; } if (gi->flags & GEOIP_INDEX_CACHE) { gi->index_cache = (unsigned char *) realloc(gi->index_cache, sizeof(unsigned char) * ((gi->databaseSegments[0] * (long)gi->record_length * 2))); if (gi->index_cache != NULL) { fseek(gi->GeoIPDatabase, 0, SEEK_SET); if (fread(gi->index_cache, sizeof(unsigned char), gi->databaseSegments[0] * (long)gi->record_length * 2, gi->GeoIPDatabase) != (size_t) (gi->databaseSegments[0]*(long)gi->record_length * 2)) { fprintf(stderr,"Error reading file %s where reloading\n",gi->file_path); return -1; } } } } } } return 0; }
GeoIP* GeoIP_open (const char * filename, int flags) { struct stat buf; GeoIP * gi; size_t len; #ifdef WIN32 int name_len; wchar_t* wfilename; wchar_t const* dst_start; char const* src_start; #endif gi = (GeoIP *)malloc(sizeof(GeoIP)); if (gi == NULL) return NULL; len = sizeof(char) * (strlen(filename)+1); gi->file_path = (char*)malloc(len); if (gi->file_path == NULL) { free(gi); return NULL; } strncpy(gi->file_path, filename, len); #ifdef WIN32 assert(sizeof(wchar_t) == 2); name_len = strlen(filename); wfilename = malloc((name_len + 1) * sizeof(wchar_t)); dst_start = wfilename; src_start = filename; ConvertUTF8toUTF16((const UTF8**)&src_start, (const UTF8*)src_start + name_len+1, (UTF16**)&dst_start, (UTF16*)dst_start + name_len + 1 , lenientConversion); gi->GeoIPDatabase = _wfopen(wfilename,L"rb"); free(wfilename); #else gi->GeoIPDatabase = fopen(filename,"rb"); #endif if (gi->GeoIPDatabase == NULL) { fprintf(stderr,"Error Opening file %s\n",filename); free(gi->file_path); free(gi); return NULL; } else { if (flags & (GEOIP_MEMORY_CACHE | GEOIP_MMAP_CACHE) ) { if (fstat(_fileno(gi->GeoIPDatabase), &buf) == -1) { fprintf(stderr,"Error stating file %s\n",filename); free(gi->file_path); free(gi); return NULL; } gi->mtime = buf.st_mtime; gi->size = buf.st_size; #if !defined WIN32 && !defined __OS2__ /* MMAP added my Peter Shipley */ if ( flags & GEOIP_MMAP_CACHE) { gi->cache = (unsigned char*)mmap(NULL, buf.st_size, PROT_READ, MAP_PRIVATE, fileno(gi->GeoIPDatabase), 0); if ( gi->cache == MAP_FAILED ) { fprintf(stderr,"Error mmaping file %s\n",filename); free(gi->file_path); free(gi); return NULL; } } else #endif { gi->cache = (unsigned char *) malloc(sizeof(unsigned char) * buf.st_size); if (gi->cache != NULL) { if (fread(gi->cache, sizeof(unsigned char), buf.st_size, gi->GeoIPDatabase) != (size_t) buf.st_size) { fprintf(stderr,"Error reading file %s\n",filename); free(gi->cache); free(gi->file_path); free(gi); return NULL; } } } } else { if (flags & GEOIP_CHECK_CACHE) { if (fstat(_fileno(gi->GeoIPDatabase), &buf) == -1) { fprintf(stderr,"Error stating file %s\n",filename); free(gi->file_path); free(gi); return NULL; } gi->mtime = buf.st_mtime; } gi->cache = NULL; } gi->flags = flags; gi->charset = GEOIP_CHARSET_ISO_8859_1; _setup_segments(gi); if (flags & GEOIP_INDEX_CACHE) { gi->index_cache = (unsigned char *) malloc(sizeof(unsigned char) * ((gi->databaseSegments[0] * (long)gi->record_length * 2))); if (gi->index_cache != NULL) { fseek(gi->GeoIPDatabase, 0, SEEK_SET); if (fread(gi->index_cache, sizeof(unsigned char), gi->databaseSegments[0] * (long)gi->record_length * 2, gi->GeoIPDatabase) != (size_t) (gi->databaseSegments[0]*(long)gi->record_length * 2)) { fprintf(stderr,"Error reading file %s\n",filename); free(gi->databaseSegments); free(gi->index_cache); free(gi); return NULL; } } } else { gi->index_cache = NULL; } return gi; } }