// Try converting a C-string from different encodings until a possible match is found.
// This tries the following encoding converters (in the same order):
// utf8, system, default and iso8859-1 to iso8859-15
wxFontEncoding DetectEncodingAndConvert(const char* strIn, wxString& strOut, wxFontEncoding possibleEncoding)
{
    wxFontEncoding encoding = possibleEncoding;
    strOut.Clear();

    if (platform::unicode)
    {
        if (possibleEncoding != wxFONTENCODING_UTF16 &&
            possibleEncoding != wxFONTENCODING_UTF16LE &&
            possibleEncoding != wxFONTENCODING_UTF16BE &&
            possibleEncoding != wxFONTENCODING_UTF32 &&
            possibleEncoding != wxFONTENCODING_UTF32LE &&
            possibleEncoding != wxFONTENCODING_UTF32BE)
        {
            // crashes deep in the runtime (windows, at least)
            // if one of the above encodings, hence the guard
            wxCSConv conv(possibleEncoding);
            strOut = wxString(strIn, conv);

            if (strOut.Length() == 0)
            {
                // oops! wrong encoding...

                // try utf8 first, if that was not what was asked for
                if (possibleEncoding != wxFONTENCODING_UTF8)
                {
                    encoding = wxFONTENCODING_UTF8;
                    strOut = wxString(strIn, wxConvUTF8);
                }

                // check again: if still not right, try system encoding, default encoding and then iso8859-1 to iso8859-15
                if (strOut.Length() == 0)
                {
                    for (int i = wxFONTENCODING_SYSTEM; i < wxFONTENCODING_ISO8859_MAX; ++i)
                    {
                        encoding = (wxFontEncoding)i;
                        if (encoding == possibleEncoding)
                            continue; // skip if same as what was asked
                        wxCSConv csconv(encoding);
                        strOut = wxString(strIn, csconv);
                        if (strOut.Length() != 0)
                            break; // got it!
                    }
                }
            }
        }
        else
        {
            strOut = (const wxChar*) strIn;
        }
    }
    else
    {
        strOut = (const wxChar*) strIn;
    }
    return encoding;
}
bool EncodingDetector::ConvertToWxString(const wxByte* buffer, size_t size)
{
    LogManager* logmgr = Manager::Get()->GetLogManager();
    wxString    logmsg;

    if (!buffer || size == 0)
    {
        if (m_UseLog)
        {
            logmsg.Printf(_T("Encoding conversion has failed (buffer is empty)!"));
            logmgr->DebugLog(logmsg);
        }
        return false; // Nothing we can do...
    }

    if (m_BOMSizeInBytes > 0)
    {
        for (int i = 0; i < m_BOMSizeInBytes; ++i)
            buffer++;
    }

    size_t outlen = 0;

    /* NOTE (Biplab#5#): FileManager returns a buffer with 4 extra NULL chars appended.
       But the buffer size is returned sans the NULL chars */

    wxWCharBuffer wideBuff;

    // if possible use the special conversion-routines, they are much faster than wxCSCov (at least on linux)
    if      ( m_Encoding == wxFONTENCODING_UTF7 )
    {
        wxMBConvUTF7 conv;
        wideBuff = conv.cMB2WC((const char*)buffer, size + 4 - m_BOMSizeInBytes, &outlen);
    }
    else if ( m_Encoding == wxFONTENCODING_UTF8 )
    {
        wxMBConvUTF8 conv;
        wideBuff = conv.cMB2WC((const char*)buffer, size + 4 - m_BOMSizeInBytes, &outlen);
    }
    else if ( m_Encoding == wxFONTENCODING_UTF16BE )
    {
        wxMBConvUTF16BE conv;
        wideBuff = conv.cMB2WC((const char*)buffer, size + 4 - m_BOMSizeInBytes, &outlen);
    }
    else if ( m_Encoding == wxFONTENCODING_UTF16LE )
    {
        wxMBConvUTF16LE conv;
        wideBuff = conv.cMB2WC((const char*)buffer, size + 4 - m_BOMSizeInBytes, &outlen);
    }
    else if ( m_Encoding == wxFONTENCODING_UTF32BE )
    {
        wxMBConvUTF32BE conv;
        wideBuff = conv.cMB2WC((const char*)buffer, size + 4 - m_BOMSizeInBytes, &outlen);
    }
    else if ( m_Encoding == wxFONTENCODING_UTF32LE )
    {
        wxMBConvUTF32LE conv;
        wideBuff = conv.cMB2WC((const char*)buffer, size + 4 - m_BOMSizeInBytes, &outlen);
    }
    else
    {
        // try wxEncodingConverter first, even it it only works for
        // wxFONTENCODING_ISO8859_1..15, wxFONTENCODING_CP1250..1257 and wxFONTENCODING_KOI8
        // but it's much, much faster than wxCSConv (at least on Linux)
        wxEncodingConverter conv;
        wchar_t* tmp = new wchar_t[size + 4 - m_BOMSizeInBytes];
        if (  conv.Init(m_Encoding, wxFONTENCODING_UNICODE)
           && conv.Convert((const char*)buffer, tmp) )
        {
            wideBuff = tmp;
            outlen = size + 4 - m_BOMSizeInBytes; // should be correct, because Convert has returned true
            if (m_UseLog && outlen>0)
            {
                logmsg.Printf(_T("Conversion succeeded using wxEncodingConverter "
                                 "(buffer size = %lu, converted size = %lu."), static_cast<unsigned long>(size), static_cast<unsigned long>(outlen));
                logmgr->DebugLog(logmsg);
            }
        }
        else
        {
            // try wxCSConv, if nothing else works
            wxCSConv csconv(m_Encoding);
            if (csconv.IsOk())
            {
                wideBuff = csconv.cMB2WC((const char*)buffer, size + 4 - m_BOMSizeInBytes, &outlen);
                if (m_UseLog && outlen>0)
                {
                    logmsg.Printf(_T("Conversion succeeded using wxCSConv "
                                     "(buffer size = %lu, converted size = %lu."), static_cast<unsigned long>(size), static_cast<unsigned long>(outlen));
                    logmgr->DebugLog(logmsg);
                }
            }
        }
        delete [] tmp;
    }

    if (outlen>0)
    {
        m_ConvStr = wxString(wideBuff);
        return true; // Done.
    }

    // Here, outlen == 0, so an error occurred during conversion.
    if (m_UseLog)
    {
        logmsg.Printf(_T("Encoding conversion using settings has failed!\n"
                         "Encoding chosen was: %s (ID: %d)"),
                      wxFontMapper::Get()->GetEncodingDescription(m_Encoding).wx_str(),
                      m_Encoding);
        logmgr->DebugLog(logmsg);
    }

    // Try system locale as fall-back (if requested by the settings)
    ConfigManager* cfgMgr = Manager::Get()->GetConfigManager(_T("editor"));
    if (cfgMgr->ReadBool(_T("/default_encoding/use_system"), true))
    {
        if (platform::windows)
        {
            if (m_UseLog)
                logmgr->DebugLog(_T("Trying system locale as fallback..."));

            m_Encoding = wxLocale::GetSystemEncoding();
        }
        else
        {
            // We can rely on the UTF-8 detection code ;-)
            if (m_UseLog)
                logmgr->DebugLog(_T("Trying ISO-8859-1 as fallback..."));

            m_Encoding = wxFONTENCODING_ISO8859_1;
        }

        wxCSConv conv_system(m_Encoding);
        wideBuff = conv_system.cMB2WC((const char*)buffer, size + 4 - m_BOMSizeInBytes, &outlen);
        m_ConvStr = wxString(wideBuff);

        if (outlen == 0)
        {
            if (m_UseLog)
            {
                logmsg.Printf(_T("Encoding conversion using system locale fallback has failed!\n"
                                 "Last encoding choosen was: %s (ID: %d)\n"
                                 "Don't know what to do."),
                              wxFontMapper::Get()->GetEncodingDescription(m_Encoding).c_str(),
                              m_Encoding);
                logmgr->DebugLog(logmsg);
            }
            return false; // Nothing we can do...
        }
    }
    else
    {
        if (m_UseLog)
        {
            logmgr->DebugLog(_T("Encoding conversion has seriously failed!\n"
                                "Don't know what to do."));
        }
        return false; // Nothing we can do...
    }

    return true;
}
Example #3
0
bool FileManager::WriteWxStringToFile(wxFile& f, const wxString& data, wxFontEncoding encoding, bool bom)
{
    const char* mark = nullptr;
    size_t mark_length = 0;
    if (bom)
    {
        switch (encoding)
        {
        case wxFONTENCODING_UTF8:
            mark = "\xEF\xBB\xBF";
            mark_length = 3;
            break;
        case wxFONTENCODING_UTF16BE:
            mark = "\xFE\xFF";
            mark_length = 2;
            break;
        case wxFONTENCODING_UTF16LE:
            mark = "\xFF\xFE";
            mark_length = 2;
            break;
        case wxFONTENCODING_UTF32BE:
            mark = "\x00\x00\xFE\xFF";
            mark_length = 4;
            break;
        case wxFONTENCODING_UTF32LE:
            mark = "\xFF\xFE\x00\x00";
            mark_length = 4;
            break;
        case wxFONTENCODING_SYSTEM:
        default:
            break;
        }

        if (f.Write(mark, mark_length) != mark_length)
            return false;
    }

    if (data.length() == 0)
        return true;

#if defined(UNICODE) || defined(_UNICODE)

    size_t inlen = data.Len(), outlen = 0;
    wxCharBuffer mbBuff;
    if ( encoding == wxFONTENCODING_UTF7 )
    {
        wxMBConvUTF7 conv;
        mbBuff = conv.cWC2MB(data.c_str(), inlen, &outlen);
    }
    else if ( encoding == wxFONTENCODING_UTF8 )
    {
        wxMBConvUTF8 conv;
        mbBuff = conv.cWC2MB(data.c_str(), inlen, &outlen);
    }
    else if ( encoding == wxFONTENCODING_UTF16BE )
    {
        wxMBConvUTF16BE conv;
        mbBuff = conv.cWC2MB(data.c_str(), inlen, &outlen);
    }
    else if ( encoding == wxFONTENCODING_UTF16LE )
    {
        wxMBConvUTF16LE conv;
        mbBuff = conv.cWC2MB(data.c_str(), inlen, &outlen);
    }
    else if ( encoding == wxFONTENCODING_UTF32BE )
    {
        wxMBConvUTF32BE conv;
        mbBuff = conv.cWC2MB(data.c_str(), inlen, &outlen);
    }
    else if ( encoding == wxFONTENCODING_UTF32LE )
    {
        wxMBConvUTF32LE conv;
        mbBuff = conv.cWC2MB(data.c_str(), inlen, &outlen);
    }
    else
    {
        // try wxEncodingConverter first, even it it only works for
        // wxFONTENCODING_ISO8859_1..15, wxFONTENCODING_CP1250..1257 and wxFONTENCODING_KOI8
        // but it's much, much faster than wxCSConv (at least on linux)
        wxEncodingConverter conv;
        // should be long enough
        char* tmp = new char[2*inlen];

        if (conv.Init(wxFONTENCODING_UNICODE, encoding) && conv.Convert(data.wx_str(), tmp))
        {
            mbBuff = tmp;
            outlen = strlen(mbBuff); // should be correct, because Convert has returned true
        }
        else
        {
            // try wxCSConv, if nothing else works
            wxCSConv csconv(encoding);
            mbBuff = csconv.cWC2MB(data.c_str(), inlen, &outlen);
        }
        delete[] tmp;
    }
     // if conversion to chosen encoding succeeded, we write the file to disk
    if (outlen > 0)
        return f.Write(mbBuff, outlen) == outlen;

    // if conversion to chosen encoding does not succeed, we try UTF-8 instead
    size_t size = 0;
    wxCSConv conv(encoding);
    wxCharBuffer buf = data.mb_str(conv);

    if (!buf || !(size = strlen(buf)))
    {
        buf = data.mb_str(wxConvUTF8);

        if (!buf || !(size = strlen(buf)))
        {
            cbMessageBox(_T(    "The file could not be saved because it contains characters "
                                "that can neither be represented in your current code page, "
                                "nor be converted to UTF-8.\n"
                                "The latter should actually not be possible.\n\n"
                                "Please check your language/encoding settings and try saving again." ),
                                _("Failure"), wxICON_WARNING | wxOK );
            return false;
        }
        else
        {
            InfoWindow::Display(_("Encoding Changed"),
                                _("The saved document contained characters\n"
                                  "which were illegal in the selected encoding.\n\n"
                                  "The file's encoding has been changed to UTF-8\n"
                                  "to prevent you from losing data."), 8000);
        }
    }

    return f.Write(buf, size) == size;

#else

    // For ANSI builds, dump the char* to file.
    return f.Write(data.c_str(), data.Length()) == data.Length();

#endif
}