void test_is_tokenizer_proc(gconstpointer data) { const gchar *name; grn_obj *object; name = gcut_data_get_string(data, "name"); object = grn_ctx_get(context, name, strlen(name)); if (gcut_data_get_string(data, "expected")) { cut_assert_true(grn_obj_is_tokenizer_proc(context, object)); } else { cut_assert_false(grn_obj_is_tokenizer_proc(context, object)); } }
static grn_obj * create_lexicon_for_tokenize(grn_ctx *ctx, grn_obj *tokenizer_name, grn_obj *normalizer_name, grn_obj *token_filter_names) { grn_obj *lexicon; grn_obj *tokenizer; grn_obj *normalizer = NULL; tokenizer = grn_ctx_get(ctx, GRN_TEXT_VALUE(tokenizer_name), GRN_TEXT_LEN(tokenizer_name)); if (!tokenizer) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "[tokenize] nonexistent tokenizer: <%.*s>", (int)GRN_TEXT_LEN(tokenizer_name), GRN_TEXT_VALUE(tokenizer_name)); return NULL; } if (!grn_obj_is_tokenizer_proc(ctx, tokenizer)) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, tokenizer); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "[tokenize] not tokenizer: %.*s", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); grn_obj_unlink(ctx, tokenizer); return NULL; } if (GRN_TEXT_LEN(normalizer_name) > 0) { normalizer = grn_ctx_get(ctx, GRN_TEXT_VALUE(normalizer_name), GRN_TEXT_LEN(normalizer_name)); if (!normalizer) { grn_obj_unlink(ctx, tokenizer); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "[tokenize] nonexistent normalizer: <%.*s>", (int)GRN_TEXT_LEN(normalizer_name), GRN_TEXT_VALUE(normalizer_name)); return NULL; } if (!grn_obj_is_normalizer_proc(ctx, normalizer)) { grn_obj inspected; grn_obj_unlink(ctx, tokenizer); GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, normalizer); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "[tokenize] not normalizer: %.*s", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); grn_obj_unlink(ctx, normalizer); return NULL; } } lexicon = grn_table_create(ctx, NULL, 0, NULL, GRN_OBJ_TABLE_HASH_KEY, grn_ctx_at(ctx, GRN_DB_SHORT_TEXT), NULL); grn_obj_set_info(ctx, lexicon, GRN_INFO_DEFAULT_TOKENIZER, tokenizer); grn_obj_unlink(ctx, tokenizer); if (normalizer) { grn_obj_set_info(ctx, lexicon, GRN_INFO_NORMALIZER, normalizer); grn_obj_unlink(ctx, normalizer); } grn_proc_table_set_token_filters(ctx, lexicon, token_filter_names); return lexicon; }