static unichar_t get_ending_utf8_char(const char *str, size_t *end_pos)
{
	unichar_t c;

	while (!UTF8_IS_START_SEQ(str[*end_pos])) {
		i_assert(*end_pos > 0);
		*end_pos -= 1;
	}
	if (uni_utf8_get_char(str + *end_pos, &c) <= 0)
		i_unreached();
	return c;
}
示例#2
0
void
fts_tokenizer_delete_trailing_partial_char(const unsigned char *data,
					   size_t *len)
{
	size_t pos;
	unsigned int char_bytes;

	/* the token is truncated - make sure the last character
	   exists entirely in the token */
	for (pos = *len-1; pos > 0; pos--) {
		if (UTF8_IS_START_SEQ(data[pos]))
			break;
	}
	char_bytes = uni_utf8_char_bytes(data[pos]);
	if (char_bytes != *len-pos) {
		i_assert(char_bytes > *len-pos);
		*len = pos;
	}
}