// Try converting a C-string from different encodings until a possible match is found. // This tries the following encoding converters (in the same order): // utf8, system, default and iso8859-1 to iso8859-15 wxFontEncoding DetectEncodingAndConvert(const char* strIn, wxString& strOut, wxFontEncoding possibleEncoding) { wxFontEncoding encoding = possibleEncoding; strOut.Clear(); if (platform::unicode) { if (possibleEncoding != wxFONTENCODING_UTF16 && possibleEncoding != wxFONTENCODING_UTF16LE && possibleEncoding != wxFONTENCODING_UTF16BE && possibleEncoding != wxFONTENCODING_UTF32 && possibleEncoding != wxFONTENCODING_UTF32LE && possibleEncoding != wxFONTENCODING_UTF32BE) { // crashes deep in the runtime (windows, at least) // if one of the above encodings, hence the guard wxCSConv conv(possibleEncoding); strOut = wxString(strIn, conv); if (strOut.Length() == 0) { // oops! wrong encoding... // try utf8 first, if that was not what was asked for if (possibleEncoding != wxFONTENCODING_UTF8) { encoding = wxFONTENCODING_UTF8; strOut = wxString(strIn, wxConvUTF8); } // check again: if still not right, try system encoding, default encoding and then iso8859-1 to iso8859-15 if (strOut.Length() == 0) { for (int i = wxFONTENCODING_SYSTEM; i < wxFONTENCODING_ISO8859_MAX; ++i) { encoding = (wxFontEncoding)i; if (encoding == possibleEncoding) continue; // skip if same as what was asked wxCSConv csconv(encoding); strOut = wxString(strIn, csconv); if (strOut.Length() != 0) break; // got it! } } } } else { strOut = (const wxChar*) strIn; } } else { strOut = (const wxChar*) strIn; } return encoding; }
bool EncodingDetector::ConvertToWxString(const wxByte* buffer, size_t size) { LogManager* logmgr = Manager::Get()->GetLogManager(); wxString logmsg; if (!buffer || size == 0) { if (m_UseLog) { logmsg.Printf(_T("Encoding conversion has failed (buffer is empty)!")); logmgr->DebugLog(logmsg); } return false; // Nothing we can do... } if (m_BOMSizeInBytes > 0) { for (int i = 0; i < m_BOMSizeInBytes; ++i) buffer++; } size_t outlen = 0; /* NOTE (Biplab#5#): FileManager returns a buffer with 4 extra NULL chars appended. But the buffer size is returned sans the NULL chars */ wxWCharBuffer wideBuff; // if possible use the special conversion-routines, they are much faster than wxCSCov (at least on linux) if ( m_Encoding == wxFONTENCODING_UTF7 ) { wxMBConvUTF7 conv; wideBuff = conv.cMB2WC((const char*)buffer, size + 4 - m_BOMSizeInBytes, &outlen); } else if ( m_Encoding == wxFONTENCODING_UTF8 ) { wxMBConvUTF8 conv; wideBuff = conv.cMB2WC((const char*)buffer, size + 4 - m_BOMSizeInBytes, &outlen); } else if ( m_Encoding == wxFONTENCODING_UTF16BE ) { wxMBConvUTF16BE conv; wideBuff = conv.cMB2WC((const char*)buffer, size + 4 - m_BOMSizeInBytes, &outlen); } else if ( m_Encoding == wxFONTENCODING_UTF16LE ) { wxMBConvUTF16LE conv; wideBuff = conv.cMB2WC((const char*)buffer, size + 4 - m_BOMSizeInBytes, &outlen); } else if ( m_Encoding == wxFONTENCODING_UTF32BE ) { wxMBConvUTF32BE conv; wideBuff = conv.cMB2WC((const char*)buffer, size + 4 - m_BOMSizeInBytes, &outlen); } else if ( m_Encoding == wxFONTENCODING_UTF32LE ) { wxMBConvUTF32LE conv; wideBuff = conv.cMB2WC((const char*)buffer, size + 4 - m_BOMSizeInBytes, &outlen); } else { // try wxEncodingConverter first, even it it only works for // wxFONTENCODING_ISO8859_1..15, wxFONTENCODING_CP1250..1257 and wxFONTENCODING_KOI8 // but it's much, much faster than wxCSConv (at least on Linux) wxEncodingConverter conv; wchar_t* tmp = new wchar_t[size + 4 - m_BOMSizeInBytes]; if ( conv.Init(m_Encoding, wxFONTENCODING_UNICODE) && conv.Convert((const char*)buffer, tmp) ) { wideBuff = tmp; outlen = size + 4 - m_BOMSizeInBytes; // should be correct, because Convert has returned true if (m_UseLog && outlen>0) { logmsg.Printf(_T("Conversion succeeded using wxEncodingConverter " "(buffer size = %lu, converted size = %lu."), static_cast<unsigned long>(size), static_cast<unsigned long>(outlen)); logmgr->DebugLog(logmsg); } } else { // try wxCSConv, if nothing else works wxCSConv csconv(m_Encoding); if (csconv.IsOk()) { wideBuff = csconv.cMB2WC((const char*)buffer, size + 4 - m_BOMSizeInBytes, &outlen); if (m_UseLog && outlen>0) { logmsg.Printf(_T("Conversion succeeded using wxCSConv " "(buffer size = %lu, converted size = %lu."), static_cast<unsigned long>(size), static_cast<unsigned long>(outlen)); logmgr->DebugLog(logmsg); } } } delete [] tmp; } if (outlen>0) { m_ConvStr = wxString(wideBuff); return true; // Done. } // Here, outlen == 0, so an error occurred during conversion. if (m_UseLog) { logmsg.Printf(_T("Encoding conversion using settings has failed!\n" "Encoding chosen was: %s (ID: %d)"), wxFontMapper::Get()->GetEncodingDescription(m_Encoding).wx_str(), m_Encoding); logmgr->DebugLog(logmsg); } // Try system locale as fall-back (if requested by the settings) ConfigManager* cfgMgr = Manager::Get()->GetConfigManager(_T("editor")); if (cfgMgr->ReadBool(_T("/default_encoding/use_system"), true)) { if (platform::windows) { if (m_UseLog) logmgr->DebugLog(_T("Trying system locale as fallback...")); m_Encoding = wxLocale::GetSystemEncoding(); } else { // We can rely on the UTF-8 detection code ;-) if (m_UseLog) logmgr->DebugLog(_T("Trying ISO-8859-1 as fallback...")); m_Encoding = wxFONTENCODING_ISO8859_1; } wxCSConv conv_system(m_Encoding); wideBuff = conv_system.cMB2WC((const char*)buffer, size + 4 - m_BOMSizeInBytes, &outlen); m_ConvStr = wxString(wideBuff); if (outlen == 0) { if (m_UseLog) { logmsg.Printf(_T("Encoding conversion using system locale fallback has failed!\n" "Last encoding choosen was: %s (ID: %d)\n" "Don't know what to do."), wxFontMapper::Get()->GetEncodingDescription(m_Encoding).c_str(), m_Encoding); logmgr->DebugLog(logmsg); } return false; // Nothing we can do... } } else { if (m_UseLog) { logmgr->DebugLog(_T("Encoding conversion has seriously failed!\n" "Don't know what to do.")); } return false; // Nothing we can do... } return true; }
bool FileManager::WriteWxStringToFile(wxFile& f, const wxString& data, wxFontEncoding encoding, bool bom) { const char* mark = nullptr; size_t mark_length = 0; if (bom) { switch (encoding) { case wxFONTENCODING_UTF8: mark = "\xEF\xBB\xBF"; mark_length = 3; break; case wxFONTENCODING_UTF16BE: mark = "\xFE\xFF"; mark_length = 2; break; case wxFONTENCODING_UTF16LE: mark = "\xFF\xFE"; mark_length = 2; break; case wxFONTENCODING_UTF32BE: mark = "\x00\x00\xFE\xFF"; mark_length = 4; break; case wxFONTENCODING_UTF32LE: mark = "\xFF\xFE\x00\x00"; mark_length = 4; break; case wxFONTENCODING_SYSTEM: default: break; } if (f.Write(mark, mark_length) != mark_length) return false; } if (data.length() == 0) return true; #if defined(UNICODE) || defined(_UNICODE) size_t inlen = data.Len(), outlen = 0; wxCharBuffer mbBuff; if ( encoding == wxFONTENCODING_UTF7 ) { wxMBConvUTF7 conv; mbBuff = conv.cWC2MB(data.c_str(), inlen, &outlen); } else if ( encoding == wxFONTENCODING_UTF8 ) { wxMBConvUTF8 conv; mbBuff = conv.cWC2MB(data.c_str(), inlen, &outlen); } else if ( encoding == wxFONTENCODING_UTF16BE ) { wxMBConvUTF16BE conv; mbBuff = conv.cWC2MB(data.c_str(), inlen, &outlen); } else if ( encoding == wxFONTENCODING_UTF16LE ) { wxMBConvUTF16LE conv; mbBuff = conv.cWC2MB(data.c_str(), inlen, &outlen); } else if ( encoding == wxFONTENCODING_UTF32BE ) { wxMBConvUTF32BE conv; mbBuff = conv.cWC2MB(data.c_str(), inlen, &outlen); } else if ( encoding == wxFONTENCODING_UTF32LE ) { wxMBConvUTF32LE conv; mbBuff = conv.cWC2MB(data.c_str(), inlen, &outlen); } else { // try wxEncodingConverter first, even it it only works for // wxFONTENCODING_ISO8859_1..15, wxFONTENCODING_CP1250..1257 and wxFONTENCODING_KOI8 // but it's much, much faster than wxCSConv (at least on linux) wxEncodingConverter conv; // should be long enough char* tmp = new char[2*inlen]; if (conv.Init(wxFONTENCODING_UNICODE, encoding) && conv.Convert(data.wx_str(), tmp)) { mbBuff = tmp; outlen = strlen(mbBuff); // should be correct, because Convert has returned true } else { // try wxCSConv, if nothing else works wxCSConv csconv(encoding); mbBuff = csconv.cWC2MB(data.c_str(), inlen, &outlen); } delete[] tmp; } // if conversion to chosen encoding succeeded, we write the file to disk if (outlen > 0) return f.Write(mbBuff, outlen) == outlen; // if conversion to chosen encoding does not succeed, we try UTF-8 instead size_t size = 0; wxCSConv conv(encoding); wxCharBuffer buf = data.mb_str(conv); if (!buf || !(size = strlen(buf))) { buf = data.mb_str(wxConvUTF8); if (!buf || !(size = strlen(buf))) { cbMessageBox(_T( "The file could not be saved because it contains characters " "that can neither be represented in your current code page, " "nor be converted to UTF-8.\n" "The latter should actually not be possible.\n\n" "Please check your language/encoding settings and try saving again." ), _("Failure"), wxICON_WARNING | wxOK ); return false; } else { InfoWindow::Display(_("Encoding Changed"), _("The saved document contained characters\n" "which were illegal in the selected encoding.\n\n" "The file's encoding has been changed to UTF-8\n" "to prevent you from losing data."), 8000); } } return f.Write(buf, size) == size; #else // For ANSI builds, dump the char* to file. return f.Write(data.c_str(), data.Length()) == data.Length(); #endif }