charsetConverter::charsetConverter(const charset& source, const charset& dest)
	: m_desc(NULL), m_source(source), m_dest(dest)
{
	// Get an iconv descriptor
	const iconv_t cd = iconv_open(dest.getName().c_str(), source.getName().c_str());

	if (cd != reinterpret_cast <iconv_t>(-1))
	{
		iconv_t* p = new iconv_t;
		*p= cd;

		m_desc = p;
	}
}
示例#2
0
charsetFilteredOutputStream_icu::charsetFilteredOutputStream_icu
	(const charset& source, const charset& dest, outputStream* os,
	 const charsetConverterOptions& opts)
	: m_from(NULL), m_to(NULL), m_sourceCharset(source),
	  m_destCharset(dest), m_stream(*os), m_options(opts)
{
	UErrorCode err = U_ZERO_ERROR;
	m_from = ucnv_open(source.getName().c_str(), &err);

	if (!U_SUCCESS(err))
	{
		throw exceptions::charset_conv_error
			("Cannot initialize ICU converter for source charset '" + source.getName() + "' (error code: " + u_errorName(err) + ".");
	}

	m_to = ucnv_open(dest.getName().c_str(), &err);

	if (!U_SUCCESS(err))
	{
		throw exceptions::charset_conv_error
			("Cannot initialize ICU converter for destination charset '" + dest.getName() + "' (error code: " + u_errorName(err) + ".");
	}

	// Tell ICU what to do when encountering an illegal byte sequence
	if (m_options.silentlyReplaceInvalidSequences)
	{
		// Set replacement chars for when converting from Unicode to codepage
		icu::UnicodeString substString(m_options.invalidSequence.c_str());
		ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err);

		if (U_FAILURE(err))
			throw exceptions::charset_conv_error("[ICU] Error when setting substitution string.");
	}
	else
	{
		// Tell ICU top stop (and return an error) on illegal byte sequences
		ucnv_setToUCallBack
			(m_to, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err);

		if (U_FAILURE(err))
			throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback.");

		ucnv_setFromUCallBack
			(m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err);

		if (U_FAILURE(err))
			throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback.");
	}
}
示例#3
0
// static
wordEncoder::Encoding wordEncoder::guessBestEncoding
	(const string& buffer, const charset& charset)
{
	// Charset-specific encoding
	encoding recEncoding;

	if (charset.getRecommendedEncoding(recEncoding))
	{
		if (recEncoding == encoding(encodingTypes::QUOTED_PRINTABLE))
			return ENCODING_QP;
		else
			return ENCODING_B64;
	}

	// Use Base64 if more than 40% non-ASCII, or Quoted-Printable else (default)
	const string::size_type asciiCount =
		utility::stringUtils::countASCIIchars(buffer.begin(), buffer.end());

	const string::size_type asciiPercent =
		(buffer.length() == 0 ? 100 : (100 * asciiCount) / buffer.length());

	if (asciiPercent < 60)
		return ENCODING_B64;
	else
		return ENCODING_QP;
}
示例#4
0
// static
bool wordEncoder::isEncodingNeeded
	(const generationContext& ctx, const string& buffer,
	 const charset& charset, const string& lang)
{
	if (!ctx.getInternationalizedEmailSupport())
	{
		// Charset-specific encoding
		encoding recEncoding;

		if (charset.getRecommendedEncoding(recEncoding))
			return true;

		// No encoding is needed if the buffer only contains ASCII chars
		if (utility::stringUtils::findFirstNonASCIIchar(buffer.begin(), buffer.end()) != string::npos)
			return true;
	}

	// Force encoding when there are only ASCII chars, but there is
	// also at least one of '\n' or '\r' (header fields)
	if (buffer.find_first_of("\n\r") != string::npos)
		return true;

	// If any RFC-2047 sequence is found in the buffer, encode it
	if (buffer.find("=?") != string::npos || buffer.find("?=") != string::npos)
		return true;

	// If a language is specified, force encoding
	if (!lang.empty())
		return true;

	return false;
}
charsetFilteredOutputStream::charsetFilteredOutputStream
	(const charset& source, const charset& dest, outputStream& os)
	: m_desc(NULL), m_sourceCharset(source), m_destCharset(dest),
	  m_stream(os), m_unconvCount(0)
{
	// Get an iconv descriptor
	const iconv_t cd = iconv_open(dest.getName().c_str(), source.getName().c_str());

	if (cd != reinterpret_cast <iconv_t>(-1))
	{
		iconv_t* p = new iconv_t;
		*p= cd;

		m_desc = p;
	}
}
示例#6
0
charsetConverter_icu::charsetConverter_icu
	(const charset& source, const charset& dest, const charsetConverterOptions& opts)
	: m_from(NULL), m_to(NULL), m_source(source), m_dest(dest), m_options(opts)
{
	UErrorCode err = U_ZERO_ERROR;
	m_from = ucnv_open(source.getName().c_str(), &err);

	if (!U_SUCCESS(err))
	{
		throw exceptions::charset_conv_error
			("Cannot initialize ICU converter for source charset '" + source.getName() + "' (error code: " + u_errorName(err) + ".");
	}

	m_to = ucnv_open(dest.getName().c_str(), &err);

	if (!U_SUCCESS(err))
	{
		throw exceptions::charset_conv_error
			("Cannot initialize ICU converter for destination charset '" + dest.getName() + "' (error code: " + u_errorName(err) + ".");
	}
}
示例#7
0
const encoding encoding::decide(ref <const contentHandler> data,
                                const charset& chset, const EncodingUsage usage)
{
    if (usage == USAGE_TEXT)
    {
        encoding recEncoding;

        if (chset.getRecommendedEncoding(recEncoding))
        {
            recEncoding.setUsage(usage);
            return recEncoding;
        }
    }

    return decide(data, usage);
}
示例#8
0
// static
bool wordEncoder::isEncodingNeeded(const string& buffer, const charset& charset)
{
	// Charset-specific encoding
	encoding recEncoding;

	if (charset.getRecommendedEncoding(recEncoding))
		return true;

	// No encoding is needed if the buffer only contains ASCII chars
	if (utility::stringUtils::findFirstNonASCIIchar(buffer.begin(), buffer.end()) != string::npos)
		return true;

	// Force encoding when there are only ASCII chars, but there is
	// also at least one of '\n' or '\r' (header fields)
	if (buffer.find_first_of("\n\r") != string::npos)
		return true;

	return false;
}
示例#9
0
文件: encoding.cpp 项目: kisli/vmime
const encoding encoding::decide(
	const shared_ptr <const contentHandler>& data,
	const charset& chset,
	const EncodingUsage usage
) {

	// Do not re-encode data if it is already encoded
	if (data->isEncoded() && !data->getEncoding().shouldReencode()) {
		return data->getEncoding();
	}

	if (usage == USAGE_TEXT) {

		encoding recEncoding;

		if (chset.getRecommendedEncoding(recEncoding)) {

			recEncoding.setUsage(usage);
			return recEncoding;
		}
	}

	return decide(data, usage);
}
示例#10
0
文件: text.cpp 项目: FEI17N/vmime
void text::createFromString(const string& in, const charset& ch)
{
	size_t asciiCount = 0;
	size_t asciiPercent = 0;

	removeAllWords();

	// Check whether there is a recommended encoding for this charset.
	// If so, the whole buffer will be encoded. Else, the number of
	// 7-bit (ASCII) bytes in the input will be used to determine if
	// we need to encode the whole buffer.
	encoding recommendedEnc;
	const bool alwaysEncode = ch.getRecommendedEncoding(recommendedEnc);

	if (!alwaysEncode)
	{
		asciiCount = utility::stringUtils::countASCIIchars(in.begin(), in.end());
		asciiPercent = (in.length() == 0 ? 100 : (100 * asciiCount) / in.length());
	}

	// If there are "too much" non-ASCII chars, encode everything
	if (alwaysEncode || asciiPercent < 60)  // less than 60% ASCII chars
	{
		appendWord(make_shared <word>(in, ch));
	}
	// Else, only encode words which need it
	else
	{
		bool is8bit = false;     // is the current word 8-bit?
		bool prevIs8bit = false; // is previous word 8-bit?
		unsigned int count = 0;  // total number of words

		for (size_t end = in.size(), pos = 0, start = 0 ; ; )
		{
			if (pos == end || parserHelpers::isSpace(in[pos]))
			{
				const string chunk(in.begin() + start, in.begin() + pos);

				if (pos != end)
					++pos;

				if (is8bit)
				{
					if (count && prevIs8bit)
					{
						// No need to create a new encoded word, just append
						// the current word to the previous one.
						shared_ptr <word> w = getWordAt(getWordCount() - 1);
						w->getBuffer() += " " + chunk;
					}
					else
					{
						if (count)
						{
							shared_ptr <word> w = getWordAt(getWordCount() - 1);
							w->getBuffer() += ' ';
						}

						appendWord(make_shared <word>(chunk, ch));

						prevIs8bit = true;
						++count;
					}
				}
				else
				{
					if (count && !prevIs8bit)
					{
						shared_ptr <word> w = getWordAt(getWordCount() - 1);
						w->getBuffer() += " " + chunk;
					}
					else
					{
						appendWord(make_shared <word>
							(chunk, charset(charsets::US_ASCII)));

						prevIs8bit = false;
						++count;
					}
				}

				if (pos == end)
					break;

				is8bit = false;
				start = pos;
			}
			else if (!parserHelpers::isAscii(in[pos]))
			{
				is8bit = true;
				++pos;
			}
			else
			{
				++pos;
			}
		}
	}
}