static unichar_t get_ending_utf8_char(const char *str, size_t *end_pos) { unichar_t c; while (!UTF8_IS_START_SEQ(str[*end_pos])) { i_assert(*end_pos > 0); *end_pos -= 1; } if (uni_utf8_get_char(str + *end_pos, &c) <= 0) i_unreached(); return c; }
void fts_tokenizer_delete_trailing_partial_char(const unsigned char *data, size_t *len) { size_t pos; unsigned int char_bytes; /* the token is truncated - make sure the last character exists entirely in the token */ for (pos = *len-1; pos > 0; pos--) { if (UTF8_IS_START_SEQ(data[pos])) break; } char_bytes = uni_utf8_char_bytes(data[pos]); if (char_bytes != *len-pos) { i_assert(char_bytes > *len-pos); *len = pos; } }