Пример #1
0
 UtfIterator<C> str_find_last_not_of(const UtfIterator<C>& b, const UtfIterator<C>& e,
         const basic_string<C>& target) {
     auto u_target = to_utf32(target);
     auto i = e;
     while (i != b) {
         --i;
         if (u_target.find(*i) == npos)
             return i;
     }
     return e;
 }
Пример #2
0
				std::pair<Iterator, boost::optional<char32_t>> decode(Iterator p, Iterator pe)
				{
					return to_utf32(p, pe);
				}
Пример #3
0
std::u32string read_with_bom(std::istream & src)
{

	enum encoding {
		encoding_utf32be = 0,
		encoding_utf32le,
		encoding_utf16be,
		encoding_utf16le,
		encoding_utf8,
		encoding_ascii,
	};

	std::vector<std::string> boms = {
		std::string("\x00\x00\xFE\xFF", 4),
		std::string("\xFF\xFE\x00\x00", 4),
		std::string("\xFE\xFF", 2),
		std::string("\xFF\xFE", 2),
		std::string("\xEF\xBB\xBF", 3)
	};

	std::string buffer((std::istreambuf_iterator<char>(src)), std::istreambuf_iterator<char>());

	encoding enc = encoding_ascii;

	for (unsigned int i = 0; i < boms.size(); ++i) {
		std::string testBom = boms[i];
		if (buffer.compare(0, testBom.length(), testBom) == 0) {
			enc = encoding(i);
			buffer = buffer.substr(testBom.length());
			break;
		}
	}

	switch (enc) {
	case encoding_utf32be:
	{
		if (buffer.length() % 4 != 0) {
			throw std::logic_error("size in bytes must be a multiple of 4");
		}
		int count = buffer.length() / 4;
		std::u32string temp = std::u32string(count, 0);
		for (int i = 0; i < count; ++i) {
			temp[i] = static_cast<char32_t>(buffer[i * 4 + 3] << 0 | buffer[i * 4 + 2] << 8 | buffer[i * 4 + 1] << 16 | buffer[i * 4 + 0] << 24);
		}
		return temp;
	}
	case encoding_utf32le:
	{
		if (buffer.length() % 4 != 0) {
			throw std::logic_error("size in bytes must be a multiple of 4");
		}
		int count = buffer.length() / 4;
		std::u32string temp = std::u32string(count, 0);
		for (int i = 0; i < count; ++i) {
			temp[i] = static_cast<char32_t>(buffer[i * 4 + 0] << 0 | buffer[i * 4 + 1] << 8 | buffer[i * 4 + 2] << 16 | buffer[i * 4 + 3] << 24);
		}
		return temp;
	}
	case encoding_utf16be:
	{
		if (buffer.length() % 2 != 0) {
			throw std::logic_error("size in bytes must be a multiple of 2");
		}
		int count = buffer.length() / 2;
		std::u16string temp = std::u16string(count, 0);
		for (int i = 0; i < count; ++i) {
			temp[i] = static_cast<char16_t>(buffer[i * 2 + 1] << 0 | buffer[i * 2 + 0] << 8);
		}
		return to_utf32(temp);
	}
	case encoding_utf16le:
	{
		if (buffer.length() % 2 != 0) {
			throw std::logic_error("size in bytes must be a multiple of 2");
		}
		int count = buffer.length() / 2;
		std::u16string temp = std::u16string(count, 0);
		for (int i = 0; i < count; ++i) {
			temp[i] = static_cast<char16_t>(buffer[i * 2 + 0] << 0 | buffer[i * 2 + 1] << 8);
		}
		return to_utf32(temp);
	}
	default:
		return to_utf32(buffer);
	}
}
Пример #4
0
std::u32string to_utf32(const std::u16string &s) {
	return to_utf32(to_utf8(s));
}
Пример #5
0
 UtfIterator<C> str_find_first_of(const UtfIterator<C>& b, const UtfIterator<C>& e,
         const basic_string<C>& target) {
     auto u_target = to_utf32(target);
     return std::find_if(b, e,
         [&] (char32_t c) { return u_target.find(c) != npos; });
 }
Пример #6
0
 UtfIterator<C> str_search(const UtfIterator<C>& b, const UtfIterator<C>& e,
         const basic_string<C>& target) {
     auto u_target = to_utf32(target);
     return std::search(b, e, u_target.begin(), u_target.end());
 }