Exemple #1
0
bool StringICompare(const std::basic_string<CharT> &lhs, const std::basic_string<CharT> &rhs)
{
    if (lhs.length() == rhs.length()) {
        return std::equal(lhs.cbegin(), lhs.cend(), rhs.cbegin(), ICompare);
    } else {
        return false;
    }
}
Exemple #2
0
   basic_str_wrap( std::basic_string<Char_T> const& str )
       :fst( &*str.cbegin() )
	   ,lst( &*str.cend() )
	   ,cstr( str.c_str() )
    {}
bool UTF8string::utf8_is_valid_() const noexcept
{
    const std::basic_string<unsigned char> U8STRING = toUstring( _utf8string );
    auto it = U8STRING.begin();
    const auto ITEND = U8STRING.cend();

    while ( it < ITEND )
    {
        if ( ( 0xF8 & *it ) == 0xF0 && *it <= 0xF4 )
        {
            // The UTF-8 codepoint begin with 0b11110xxx -> 4-byte codepoint
            // If the iterator reach the end of the string before the
            // end of the 4-byte codepoint -> invalid string
            if ( ( it + 1 ) == ITEND || ( it + 2 ) == ITEND || ( it + 3 ) == ITEND )
                return false;

            // Each of the following bytes is a value
            // between 0x80 and 0xBF
            if ( ( ( 0xC0 & *( it + 1 ) ) != 0x80 ) || ( ( 0xC0 & *( it + 2 ) ) != 0x80 )
                    || ( ( 0xC0 & *( it + 3 ) ) != 0x80 ) )
            {
                return false;
            }

            // If the first byte of the sequence is 0xF0
            // then the first continuation byte must be between 0x90 and 0xBF
            // otherwise, if the byte is 0xF4
            // then the first continuation byte must be between 0x80 and 0x8F
            if ( *it == 0xF0 )
            {
                if ( *( it + 1 ) < 0x90 || *( it + 1 ) > 0xBF )
                    return false;
            }
            else if ( *it == 0xF4 )
            {
                if ( *( it + 1 ) < 0x80 || *( it + 1 ) > 0x8F )
                    return false;
            }

            it += 4;    // Jump to the next codepoint
        }
        else if ( ( 0xF0 & *it ) == 0xE0 )
        {
            // The UTF-8 codepoint begin with 0b1110xxxx -> 3-byte codepoint
            if ( ( it + 1 ) == ITEND || ( it + 2 ) == ITEND )
                return false;

            // Each of the following bytes starts with
            // 0b10xxxxxx in a valid string
            if ( ( ( 0xC0 & *( it + 1 ) ) != 0x80 ) || ( ( 0xC0 & *( it + 2 ) ) != 0x80 ) )
                return false;

            // If the first byte of the sequence is 0xE0
            // then the first continuation byte must be between 0xA0 and 0xBF
            // otherwise, if the byte is 0xF4
            // then the first continuation byte must be between 0x80 and 0x9F
            if ( *it == 0xE0 )
            {
                if ( *( it + 1 ) < 0xA0 || *( it + 1 ) > 0xBF )
                    return false;
            }
            else if ( *it == 0xED )
            {
                if ( *( it + 1 ) > 0x9F )
                    return false;
            }

            it += 3;
        }
        else if ( ( 0xE0 & *it ) == 0xC0 )
        {
            // The UTF-8 codepoint begin with 0b110xxxxx -> 2-byte codepoint
            if ( ( it + 1 ) == ITEND )
                return false;

            // The following byte starts with 0b10xxxxxx in a valid string
            if ( ( 0xC0 & *( it + 1 ) ) != 0x80 )
                return false;

            it += 2;
        }
        else if ( ( 0x80 & *it ) == 0x00 )
        {
            // The UTF-8 codepoint begin with 0b0xxxxxxx -> 1-byte codepoint
            it += 1;
        }
        else
        {
            // Invalid codepoint
            return false;
        }
    }

    return true;
}