bool StringICompare(const std::basic_string<CharT> &lhs, const std::basic_string<CharT> &rhs) { if (lhs.length() == rhs.length()) { return std::equal(lhs.cbegin(), lhs.cend(), rhs.cbegin(), ICompare); } else { return false; } }
basic_str_wrap( std::basic_string<Char_T> const& str ) :fst( &*str.cbegin() ) ,lst( &*str.cend() ) ,cstr( str.c_str() ) {}
bool UTF8string::utf8_is_valid_() const noexcept { const std::basic_string<unsigned char> U8STRING = toUstring( _utf8string ); auto it = U8STRING.begin(); const auto ITEND = U8STRING.cend(); while ( it < ITEND ) { if ( ( 0xF8 & *it ) == 0xF0 && *it <= 0xF4 ) { // The UTF-8 codepoint begin with 0b11110xxx -> 4-byte codepoint // If the iterator reach the end of the string before the // end of the 4-byte codepoint -> invalid string if ( ( it + 1 ) == ITEND || ( it + 2 ) == ITEND || ( it + 3 ) == ITEND ) return false; // Each of the following bytes is a value // between 0x80 and 0xBF if ( ( ( 0xC0 & *( it + 1 ) ) != 0x80 ) || ( ( 0xC0 & *( it + 2 ) ) != 0x80 ) || ( ( 0xC0 & *( it + 3 ) ) != 0x80 ) ) { return false; } // If the first byte of the sequence is 0xF0 // then the first continuation byte must be between 0x90 and 0xBF // otherwise, if the byte is 0xF4 // then the first continuation byte must be between 0x80 and 0x8F if ( *it == 0xF0 ) { if ( *( it + 1 ) < 0x90 || *( it + 1 ) > 0xBF ) return false; } else if ( *it == 0xF4 ) { if ( *( it + 1 ) < 0x80 || *( it + 1 ) > 0x8F ) return false; } it += 4; // Jump to the next codepoint } else if ( ( 0xF0 & *it ) == 0xE0 ) { // The UTF-8 codepoint begin with 0b1110xxxx -> 3-byte codepoint if ( ( it + 1 ) == ITEND || ( it + 2 ) == ITEND ) return false; // Each of the following bytes starts with // 0b10xxxxxx in a valid string if ( ( ( 0xC0 & *( it + 1 ) ) != 0x80 ) || ( ( 0xC0 & *( it + 2 ) ) != 0x80 ) ) return false; // If the first byte of the sequence is 0xE0 // then the first continuation byte must be between 0xA0 and 0xBF // otherwise, if the byte is 0xF4 // then the first continuation byte must be between 0x80 and 0x9F if ( *it == 0xE0 ) { if ( *( it + 1 ) < 0xA0 || *( it + 1 ) > 0xBF ) return false; } else if ( *it == 0xED ) { if ( *( it + 1 ) > 0x9F ) return false; } it += 3; } else if ( ( 0xE0 & *it ) == 0xC0 ) { // The UTF-8 codepoint begin with 0b110xxxxx -> 2-byte codepoint if ( ( it + 1 ) == ITEND ) return false; // The following byte starts with 0b10xxxxxx in a valid string if ( ( 0xC0 & *( it + 1 ) ) != 0x80 ) return false; it += 2; } else if ( ( 0x80 & *it ) == 0x00 ) { // The UTF-8 codepoint begin with 0b0xxxxxxx -> 1-byte codepoint it += 1; } else { // Invalid codepoint return false; } } return true; }