bool is_utf8(const char *source) { const unsigned char* cur = (const unsigned char*)source; while (*cur) { unsigned short next_n = g_trailing_bytes_for_utf8[*cur] + 1; if (!is_legal_utf8(cur, next_n)) { return false; } cur += next_n; } return true; }
bool is_utf8(const char *source, size_t length) { const unsigned char* begin = (const unsigned char*)source; const unsigned char* end = begin + length; const unsigned char* cur = begin; while (cur < end) { unsigned short next_n = g_trailing_bytes_for_utf8[*cur] + 1; if (0 == *cur || cur + next_n > end) { return false; } if (!is_legal_utf8(cur, next_n)) { return false; } cur += next_n; } return true; }
bool is_utf8(const char *source) { while (*source) { const unsigned char *srcptr = (const unsigned char*)source; unsigned short extra_bytes_to_read = g_trailing_bytes_for_utf8[*srcptr]; switch (extra_bytes_to_read) { case 5: if (0 == *source++) return false; case 4: if (0 == *source++) return false; case 3: if (0 == *source++) return false; case 2: if (0 == *source++) return false; case 1: if (0 == *source++) return false; case 0: if (0 == *source++) return false; } if (!is_legal_utf8(srcptr, extra_bytes_to_read+1)) { return false; } } return true; }