UtfIterator<C> str_find_last_not_of(const UtfIterator<C>& b, const UtfIterator<C>& e, const basic_string<C>& target) { auto u_target = to_utf32(target); auto i = e; while (i != b) { --i; if (u_target.find(*i) == npos) return i; } return e; }
std::pair<Iterator, boost::optional<char32_t>> decode(Iterator p, Iterator pe) { return to_utf32(p, pe); }
std::u32string read_with_bom(std::istream & src) { enum encoding { encoding_utf32be = 0, encoding_utf32le, encoding_utf16be, encoding_utf16le, encoding_utf8, encoding_ascii, }; std::vector<std::string> boms = { std::string("\x00\x00\xFE\xFF", 4), std::string("\xFF\xFE\x00\x00", 4), std::string("\xFE\xFF", 2), std::string("\xFF\xFE", 2), std::string("\xEF\xBB\xBF", 3) }; std::string buffer((std::istreambuf_iterator<char>(src)), std::istreambuf_iterator<char>()); encoding enc = encoding_ascii; for (unsigned int i = 0; i < boms.size(); ++i) { std::string testBom = boms[i]; if (buffer.compare(0, testBom.length(), testBom) == 0) { enc = encoding(i); buffer = buffer.substr(testBom.length()); break; } } switch (enc) { case encoding_utf32be: { if (buffer.length() % 4 != 0) { throw std::logic_error("size in bytes must be a multiple of 4"); } int count = buffer.length() / 4; std::u32string temp = std::u32string(count, 0); for (int i = 0; i < count; ++i) { temp[i] = static_cast<char32_t>(buffer[i * 4 + 3] << 0 | buffer[i * 4 + 2] << 8 | buffer[i * 4 + 1] << 16 | buffer[i * 4 + 0] << 24); } return temp; } case encoding_utf32le: { if (buffer.length() % 4 != 0) { throw std::logic_error("size in bytes must be a multiple of 4"); } int count = buffer.length() / 4; std::u32string temp = std::u32string(count, 0); for (int i = 0; i < count; ++i) { temp[i] = static_cast<char32_t>(buffer[i * 4 + 0] << 0 | buffer[i * 4 + 1] << 8 | buffer[i * 4 + 2] << 16 | buffer[i * 4 + 3] << 24); } return temp; } case encoding_utf16be: { if (buffer.length() % 2 != 0) { throw std::logic_error("size in bytes must be a multiple of 2"); } int count = buffer.length() / 2; std::u16string temp = std::u16string(count, 0); for (int i = 0; i < count; ++i) { temp[i] = static_cast<char16_t>(buffer[i * 2 + 1] << 0 | buffer[i * 2 + 0] << 8); } return to_utf32(temp); } case encoding_utf16le: { if (buffer.length() % 2 != 0) { throw std::logic_error("size in bytes must be a multiple of 2"); } int count = buffer.length() / 2; std::u16string temp = std::u16string(count, 0); for (int i = 0; i < count; ++i) { temp[i] = static_cast<char16_t>(buffer[i * 2 + 0] << 0 | buffer[i * 2 + 1] << 8); } return to_utf32(temp); } default: return to_utf32(buffer); } }
std::u32string to_utf32(const std::u16string &s) { return to_utf32(to_utf8(s)); }
UtfIterator<C> str_find_first_of(const UtfIterator<C>& b, const UtfIterator<C>& e, const basic_string<C>& target) { auto u_target = to_utf32(target); return std::find_if(b, e, [&] (char32_t c) { return u_target.find(c) != npos; }); }
UtfIterator<C> str_search(const UtfIterator<C>& b, const UtfIterator<C>& e, const basic_string<C>& target) { auto u_target = to_utf32(target); return std::search(b, e, u_target.begin(), u_target.end()); }