static bool fts_tokenizer_generic_simple_current_token(struct generic_fts_tokenizer *tok, const char **token_r) { const unsigned char *data = tok->token->data; size_t len = tok->token->used; if (tok->untruncated_length <= tok->max_length) { /* Remove the trailing apostrophe - it was made into U+0027 earlier. There can be only a single such apostrophe, because otherwise the token would have already been split. We also want to remove the trailing apostrophe only if it's the the last character in the nontruncated token - a truncated token may end with apostrophe. */ if (len > 0 && data[len-1] == '\'') { len--; i_assert(len > 0 && data[len-1] != '\''); } } else { fts_tokenizer_delete_trailing_partial_char(data, &len); } i_assert(len <= tok->max_length); *token_r = len == 0 ? "" : t_strndup(tok->token->data, len); buffer_set_used_size(tok->token, 0); tok->untruncated_length = 0; tok->prev_letter = LETTER_TYPE_NONE; return len > 0; }
void fts_filter_truncate_token(string_t *token, size_t max_length) { if (str_len(token) <= max_length) return; size_t len = max_length; fts_tokenizer_delete_trailing_partial_char(token->data, &len); str_truncate(token, len); i_assert(len <= max_length); }