dstring TextConverter::toUTF8(const dstring &text, string &charset) { string textCharset(StringManip::toLowerCase(charset)); m_conversionErrors = 0; if ((text.empty() == true) || (textCharset == "utf-8")) { // No conversion necessary return text; } if (textCharset.empty() == true) { if (m_utf8Locale == true) { // The current locale uses UTF-8 return text; } textCharset = m_localeCharset; } return convert(text, textCharset, "UTF-8"); }
dstring TextConverter::toUTF8(const dstring &text, string &charset) { string textCharset(StringManip::toLowerCase(charset)); char outputBuffer[8192]; char *pInput = const_cast<char *>(text.c_str()); m_conversionErrors = 0; if ((text.empty() == true) || (textCharset == "utf-8")) { // No conversion necessary return text; } if (textCharset.empty() == true) { if (m_utf8Locale == true) { // The current locale uses UTF-8 return text; } textCharset = m_localeCharset; } dstring outputText; gsize inputSize = (gsize)text.length(); bool invalidSequence = false; try { IConv converter("UTF-8", textCharset); while (inputSize > 0) { char *pOutput = outputBuffer; gsize outputSize = 8192; size_t conversions = converter.iconv(&pInput, &inputSize, &pOutput, &outputSize); int errorCode = errno; if (conversions == static_cast<size_t>(-1)) { if (errorCode == EILSEQ) { // Conversion was only partially successful ++m_conversionErrors; #ifdef DEBUG cout << "TextConverter::toUTF8: invalid sequence" << endl; #endif if (m_conversionErrors >= m_maxErrors) { // Give up return text; } converter.reset(); outputText.append(outputBuffer, 8192 - outputSize); if (invalidSequence == false) { outputText += "?"; invalidSequence = true; } // Skip that ++pInput; --inputSize; continue; } else if (errorCode != E2BIG) { #ifdef DEBUG cout << "TextConverter::toUTF8: unknown error " << errorCode << endl; #endif return text; } } else { invalidSequence = false; } // Append what was successfully converted outputText.append(outputBuffer, 8192 - outputSize); } #ifdef DEBUG cout << "TextConverter::toUTF8: " << m_conversionErrors << " conversion errors" << endl; #endif } catch (Error &ce) { #ifdef DEBUG cout << "TextConverter::toUTF8: " << ce.what() << endl; #endif outputText.clear(); string::size_type pos = textCharset.find('_'); if (pos != string::npos) { string fixedCharset(StringManip::replaceSubString(textCharset, "_", "-")); #ifdef DEBUG cout << "TextConverter::toUTF8: trying with charset " << fixedCharset << endl; #endif textCharset = fixedCharset; outputText = toUTF8(text, fixedCharset); } } catch (...) { #ifdef DEBUG cout << "TextConverter::toUTF8: unknown exception" << endl; #endif outputText.clear(); } charset = textCharset; return outputText; }