static bool
fts_tokenizer_generic_simple_current_token(struct generic_fts_tokenizer *tok,
                                           const char **token_r)
{
	const unsigned char *data = tok->token->data;
	size_t len = tok->token->used;

	if (tok->untruncated_length <= tok->max_length) {
		/* Remove the trailing apostrophe - it was made
		   into U+0027 earlier. There can be only a single such
		   apostrophe, because otherwise the token would have already
		   been split. We also want to remove the trailing apostrophe
		   only if it's the the last character in the nontruncated
		   token - a truncated token may end with apostrophe. */
		if (len > 0 && data[len-1] == '\'') {
			len--;
			i_assert(len > 0 && data[len-1] != '\'');
		}
	} else {
		fts_tokenizer_delete_trailing_partial_char(data, &len);
	}
	i_assert(len <= tok->max_length);

	*token_r = len == 0 ? "" :
		t_strndup(tok->token->data, len);
	buffer_set_used_size(tok->token, 0);
	tok->untruncated_length = 0;
	tok->prev_letter = LETTER_TYPE_NONE;
	return len > 0;
}
Beispiel #2
0
void fts_filter_truncate_token(string_t *token, size_t max_length)
{
	if (str_len(token) <= max_length)
		return;

	size_t len = max_length;
	fts_tokenizer_delete_trailing_partial_char(token->data, &len);
	str_truncate(token, len);
	i_assert(len <= max_length);
}