/* * Read a single UTF-8 character starting at @s, * returning the length, in bytes, of the character read. * * This function assumes input is valid UTF-8, * and that there are enough characters in front of @s. */ static int utf8_read_char(const char *s, uchar_t *out) { const unsigned char *c = (const unsigned char*) s; assert(utf8_validate_cz(s)); if (c[0] <= 0x7F) { /* 00..7F */ *out = c[0]; return 1; } else if (c[0] <= 0xDF) { /* C2..DF (unless input is invalid) */ *out = ((uchar_t)c[0] & 0x1F) << 6 | ((uchar_t)c[1] & 0x3F); return 2; } else if (c[0] <= 0xEF) { /* E0..EF */ *out = ((uchar_t)c[0] & 0xF) << 12 | ((uchar_t)c[1] & 0x3F) << 6 | ((uchar_t)c[2] & 0x3F); return 3; } else { /* F0..F4 (unless input is invalid) */ *out = ((uchar_t)c[0] & 0x7) << 18 | ((uchar_t)c[1] & 0x3F) << 12 | ((uchar_t)c[2] & 0x3F) << 6 | ((uchar_t)c[3] & 0x3F); return 4; } }
/* Validate a null-terminated UTF-8 string. */ static bool utf8_validate(const char *s) { size_t len; for (; *s != 0; s += len) { len = utf8_validate_cz(s); if (len == 0) return false; } return true; }
/* Validate a null-terminated UTF-8 string. */ static int utf8_validate(const char *s) { int len; for (; *s != 0; s += len) { len = utf8_validate_cz(s); if (len == 0) return FALSE; } return TRUE; }