std::pair<Iterator, char32_t> to_utf32_unchecked(Iterator p) { if (!is_surrogate(*p)) { // A simple case. char32_t result = *p++; return std::make_pair(p, result); } // Otherwise have to process a surrogate pair. { assert(is_leading(*p)); // 0x010000 .. 0x10FFFF // Code point - 0x10000 == // 3210 FEDC BA98 7654 3210 // LLLL LLLL LLTT TTTT TTTT char32_t result = 0x10000; result += (*p++ - 0xD800) << 10; result += (*p++ - 0xDC00); return std::make_pair(p, result); } }
std::pair<Iterator, boost::optional<char32_t>> to_utf32(Iterator p, Iterator pe) { char32_t result; if (!is_surrogate(*p)) { // A simple case. result = *p; return std::make_pair(++p, result); } // Otherwise have to process a surrogate pair. { // 0x010000 .. 0x10FFFF // Code point - 0x10000 == // 3210 FEDC BA98 7654 3210 // LLLL LLLL LLTT TTTT TTTT result = 0x10000; if (!is_leading(*p)) goto error; // A lead surrogate was expected. result += (*p - 0xD800) << 10; ++p; if (p == pe) goto error; // Sequence has ended prematurely. if (!is_trailing(*p)) goto error; // A trail surrogate was expected. result += (*p - 0xDC00); ++p; if (!is_valid_code_point(result)) goto error; // Sequence encodes an invalid code point. return std::make_pair(p, result); error: return std::make_pair(p, boost::none); } }
inline bool is_valid (char_t c) { return !(is_surrogate(c) || c.value >= char_t::max_code_point); }