예제 #1
0
dstring TextConverter::toUTF8(const dstring &text,
	string &charset)
{
	string textCharset(StringManip::toLowerCase(charset));

	m_conversionErrors = 0;

	if ((text.empty() == true) ||
		(textCharset == "utf-8"))
	{
		// No conversion necessary
		return text;
	}

	if (textCharset.empty() == true)
	{
		if (m_utf8Locale == true)
		{
			// The current locale uses UTF-8
			return text;
		}

		textCharset = m_localeCharset;
	}

	return convert(text, textCharset, "UTF-8");
}
예제 #2
0
dstring TextConverter::toUTF8(const dstring &text, string &charset)
{
	string textCharset(StringManip::toLowerCase(charset));
	char outputBuffer[8192];
	char *pInput = const_cast<char *>(text.c_str());

	m_conversionErrors = 0;

	if ((text.empty() == true) ||
		(textCharset == "utf-8"))
	{
		// No conversion necessary
		return text;
	}

	if (textCharset.empty() == true)
	{
		if (m_utf8Locale == true)
		{
			// The current locale uses UTF-8
			return text;
		}

		textCharset = m_localeCharset;
	}

	dstring outputText;
	gsize inputSize = (gsize)text.length();
	bool invalidSequence = false;

	try
	{
		IConv converter("UTF-8", textCharset);
 
		while (inputSize > 0)
		{
			char *pOutput = outputBuffer;
			gsize outputSize = 8192;

			size_t conversions = converter.iconv(&pInput, &inputSize, &pOutput, &outputSize);
			int errorCode = errno;
			if (conversions == static_cast<size_t>(-1))
			{
				if (errorCode == EILSEQ)
				{
					// Conversion was only partially successful
					++m_conversionErrors;
#ifdef DEBUG
					cout << "TextConverter::toUTF8: invalid sequence" << endl;
#endif
					if (m_conversionErrors >= m_maxErrors)
					{
						// Give up
						return text;
					}
					converter.reset();

					outputText.append(outputBuffer, 8192 - outputSize);
					if (invalidSequence == false)
					{
						outputText += "?";
						invalidSequence = true;
					}

					// Skip that
					++pInput;
					--inputSize;
					continue;
				}
				else if (errorCode != E2BIG)
				{
#ifdef DEBUG
					cout << "TextConverter::toUTF8: unknown error " << errorCode << endl;
#endif
					return text;
				}
			}
			else
			{
				invalidSequence = false;
			}

			// Append what was successfully converted
			outputText.append(outputBuffer, 8192 - outputSize);
		}

#ifdef DEBUG
		cout << "TextConverter::toUTF8: " << m_conversionErrors << " conversion errors" << endl;
#endif
	}
	catch (Error &ce)
	{
#ifdef DEBUG
		cout << "TextConverter::toUTF8: " << ce.what() << endl;
#endif
		outputText.clear();

		string::size_type pos = textCharset.find('_');
		if (pos != string::npos)
		{
			string fixedCharset(StringManip::replaceSubString(textCharset, "_", "-"));

#ifdef DEBUG
			cout << "TextConverter::toUTF8: trying with charset " << fixedCharset << endl;
#endif
			textCharset = fixedCharset;
			outputText = toUTF8(text, fixedCharset);
		}
	}
	catch (...)
	{
#ifdef DEBUG
		cout << "TextConverter::toUTF8: unknown exception" << endl;
#endif
		outputText.clear();
	}

	charset = textCharset;
	return outputText;
}
예제 #3
0
dstring TextConverter::convert(const dstring &text,
	string &fromCharset, const string &toCharset)
{
	dstring outputText;
	char outputBuffer[8192];
	char *pInput = const_cast<char *>(text.c_str());
	gsize inputSize = (gsize)text.length();
	bool invalidSequence = false;

	outputText.clear();
	try
	{
		IConv converter(toCharset, fromCharset);
 
		while (inputSize > 0)
		{
			char *pOutput = outputBuffer;
			gsize outputSize = 8192;

			size_t conversions = converter.iconv(&pInput, &inputSize, &pOutput, &outputSize);
			int errorCode = errno;
			if (conversions == static_cast<size_t>(-1))
			{
				if (errorCode == EILSEQ)
				{
					// Conversion was only partially successful
					++m_conversionErrors;
#ifdef DEBUG
					clog << "TextConverter::convert: invalid sequence" << endl;
#endif
					if (m_conversionErrors >= m_maxErrors)
					{
						// Give up
						return text;
					}
					converter.reset();

					outputText.append(outputBuffer, 8192 - outputSize);
					if (invalidSequence == false)
					{
						outputText += "?";
						invalidSequence = true;
					}

					// Skip that
					++pInput;
					--inputSize;
					continue;
				}
				else if (errorCode != E2BIG)
				{
#ifdef DEBUG
					clog << "TextConverter::convert: unknown error " << errorCode << endl;
#endif
					return text;
				}
			}
			else
			{
				invalidSequence = false;
			}

			// Append what was successfully converted
			outputText.append(outputBuffer, 8192 - outputSize);
		}

#ifdef DEBUG
		clog << "TextConverter::convert: " << m_conversionErrors << " conversion errors" << endl;
#endif
	}
	catch (Error &ce)
	{
#ifdef DEBUG
		clog << "TextConverter::convert: " << ce.what() << endl;
#endif
		outputText.clear();

		string::size_type pos = fromCharset.find('_');
		if (pos != string::npos)
		{
			string fixedCharset(StringManip::replaceSubString(fromCharset, "_", "-"));

#ifdef DEBUG
			clog << "TextConverter::convert: trying with charset " << fixedCharset << endl;
#endif
			fromCharset = fixedCharset;
			outputText = convert(text, fromCharset, toCharset);
		}
	}
	catch (...)
	{
#ifdef DEBUG
		clog << "TextConverter::convert: unknown exception" << endl;
#endif
		outputText.clear();
	}

	return outputText;
}