static grn_obj * func_highlight_html(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_obj *highlighted = NULL; grn_obj *string; grn_obj *lexicon = NULL; grn_obj *expression = NULL; grn_highlighter *highlighter; grn_obj *highlighter_ptr; if (!(1 <= nargs && nargs <= 2)) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "highlight_html(): wrong number of arguments (%d for 1..2)", nargs); highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); return highlighted; } string = args[0]; if (nargs == 2) { lexicon = args[1]; } grn_proc_get_info(ctx, user_data, NULL, NULL, &expression); highlighter_ptr = grn_expr_get_var(ctx, expression, GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME, strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME)); if (highlighter_ptr) { highlighter = (grn_highlighter *)GRN_PTR_VALUE(highlighter_ptr); } else { highlighter_ptr = grn_expr_get_or_add_var(ctx, expression, GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME, strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME)); GRN_OBJ_FIN(ctx, highlighter_ptr); GRN_PTR_INIT(highlighter_ptr, GRN_OBJ_OWN, GRN_DB_OBJECT); highlighter = func_highlight_html_create_highlighter(ctx, expression); grn_highlighter_set_lexicon(ctx, highlighter, lexicon); GRN_PTR_SET(ctx, highlighter_ptr, highlighter); } highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_TEXT, 0); grn_highlighter_highlight(ctx, highlighter, GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string), highlighted); return highlighted; }
static grn_obj * func_highlight_html(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_obj *highlighted = NULL; #define N_REQUIRED_ARGS 1 if (nargs == N_REQUIRED_ARGS) { grn_obj *string = args[0]; grn_obj *expression = NULL; grn_obj *keywords; grn_obj *keywords_ptr; grn_bool use_html_escape = GRN_TRUE; grn_proc_get_info(ctx, user_data, NULL, NULL, &expression); keywords_ptr = grn_expr_get_var(ctx, expression, GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME, strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME)); if (keywords_ptr) { keywords = GRN_PTR_VALUE(keywords_ptr); } else { keywords_ptr = grn_expr_get_or_add_var(ctx, expression, GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME, strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME)); GRN_OBJ_FIN(ctx, keywords_ptr); GRN_PTR_INIT(keywords_ptr, GRN_OBJ_OWN, GRN_DB_OBJECT); keywords = func_highlight_html_create_keywords_table(ctx, expression); GRN_PTR_SET(ctx, keywords_ptr, keywords); } highlighted = highlight_keywords(ctx, user_data, string, keywords, use_html_escape, "<span class=\"keyword\">", strlen("<span class=\"keyword\">"), "</span>", strlen("</span>")); } #undef N_REQUIRED_ARGS if (!highlighted) { highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); } return highlighted; }
static grn_obj * func_highlight_html_create_keywords_table(grn_ctx *ctx, grn_obj *expression) { grn_obj *keywords; grn_obj *condition_ptr = NULL; grn_obj *condition = NULL; keywords = grn_table_create(ctx, NULL, 0, NULL, GRN_OBJ_TABLE_PAT_KEY, grn_ctx_at(ctx, GRN_DB_SHORT_TEXT), NULL); { grn_obj *normalizer; normalizer = grn_ctx_get(ctx, "NormalizerAuto", -1); grn_obj_set_info(ctx, keywords, GRN_INFO_NORMALIZER, normalizer); grn_obj_unlink(ctx, normalizer); } condition_ptr = grn_expr_get_var(ctx, expression, GRN_SELECT_INTERNAL_VAR_CONDITION, strlen(GRN_SELECT_INTERNAL_VAR_CONDITION)); if (condition_ptr) { condition = GRN_PTR_VALUE(condition_ptr); } if (condition) { size_t i, n_keywords; grn_obj current_keywords; GRN_PTR_INIT(¤t_keywords, GRN_OBJ_VECTOR, GRN_ID_NIL); grn_expr_get_keywords(ctx, condition, ¤t_keywords); n_keywords = GRN_BULK_VSIZE(¤t_keywords) / sizeof(grn_obj *); for (i = 0; i < n_keywords; i++) { grn_obj *keyword; keyword = GRN_PTR_VALUE_AT(¤t_keywords, i); grn_table_add(ctx, keywords, GRN_TEXT_VALUE(keyword), GRN_TEXT_LEN(keyword), NULL); } grn_obj_unlink(ctx, ¤t_keywords); } return keywords; }
static grn_highlighter * func_highlight_html_create_highlighter(grn_ctx *ctx, grn_obj *expression) { grn_highlighter *highlighter; grn_obj *condition_ptr = NULL; grn_obj *condition = NULL; highlighter = grn_highlighter_open(ctx); condition_ptr = grn_expr_get_var(ctx, expression, GRN_SELECT_INTERNAL_VAR_CONDITION, strlen(GRN_SELECT_INTERNAL_VAR_CONDITION)); if (condition_ptr) { condition = GRN_PTR_VALUE(condition_ptr); } if (condition) { size_t i, n_keywords; grn_obj current_keywords; GRN_TEXT_INIT(¤t_keywords, GRN_OBJ_VECTOR); grn_expr_get_keywords(ctx, condition, ¤t_keywords); n_keywords = grn_vector_size(ctx, ¤t_keywords); for (i = 0; i < n_keywords; i++) { const char *keyword; unsigned int keyword_size; keyword_size = grn_vector_get_element(ctx, ¤t_keywords, i, &keyword, NULL, NULL); grn_highlighter_add_keyword(ctx, highlighter, keyword, keyword_size); } GRN_OBJ_FIN(ctx, ¤t_keywords); } return highlighter; }
static grn_obj * func_snippet_html(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_obj *snippets = NULL; /* TODO: support parameters */ if (nargs == 1) { grn_obj *text = args[0]; grn_obj *expression = NULL; grn_obj *condition_ptr = NULL; grn_obj *condition = NULL; grn_obj *snip = NULL; int flags = GRN_SNIP_SKIP_LEADING_SPACES; unsigned int width = 200; unsigned int max_n_results = 3; const char *open_tag = "<span class=\"keyword\">"; const char *close_tag = "</span>"; grn_snip_mapping *mapping = GRN_SNIP_MAPPING_HTML_ESCAPE; grn_proc_get_info(ctx, user_data, NULL, NULL, &expression); condition_ptr = grn_expr_get_var(ctx, expression, GRN_SELECT_INTERNAL_VAR_CONDITION, strlen(GRN_SELECT_INTERNAL_VAR_CONDITION)); if (condition_ptr) { condition = GRN_PTR_VALUE(condition_ptr); } if (condition) { grn_obj *snip_ptr; snip_ptr = grn_expr_get_var(ctx, expression, GRN_FUNC_SNIPPET_HTML_CACHE_NAME, strlen(GRN_FUNC_SNIPPET_HTML_CACHE_NAME)); if (snip_ptr) { snip = GRN_PTR_VALUE(snip_ptr); } else { snip_ptr = grn_expr_get_or_add_var(ctx, expression, GRN_FUNC_SNIPPET_HTML_CACHE_NAME, strlen(GRN_FUNC_SNIPPET_HTML_CACHE_NAME)); GRN_OBJ_FIN(ctx, snip_ptr); GRN_PTR_INIT(snip_ptr, GRN_OBJ_OWN, GRN_DB_OBJECT); snip = grn_snip_open(ctx, flags, width, max_n_results, open_tag, strlen(open_tag), close_tag, strlen(close_tag), mapping); if (snip) { grn_snip_set_normalizer(ctx, snip, GRN_NORMALIZER_AUTO); grn_expr_snip_add_conditions(ctx, condition, snip, 0, NULL, NULL, NULL, NULL); GRN_PTR_SET(ctx, snip_ptr, snip); } } } if (snip) { snippets = snippet_exec(ctx, snip, text, user_data, NULL, 0, NULL, 0); } } if (!snippets) { snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); } return snippets; }
static grn_rc selector_fuzzy_search(grn_ctx *ctx, grn_obj *table, grn_obj *index, int nargs, grn_obj **args, grn_obj *res, grn_operator op) { grn_rc rc = GRN_SUCCESS; grn_obj *target = NULL; grn_obj *obj; grn_obj *query; grn_obj *hash_args_ptr; uint32_t max_distance = 1; uint32_t prefix_length = 0; uint32_t prefix_match_size = 0; uint32_t max_expansion = 0; int flags = 0; grn_bool use_sequential_search = GRN_FALSE; if ((nargs - 1) < 2) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "fuzzy_search(): wrong number of arguments (%d ...)", nargs - 1); rc = ctx->rc; goto exit; } obj = args[1]; query = args[2]; if (nargs == 4) { grn_obj *hash; grn_hash_cursor *cursor; void *key; grn_obj *value; int key_size, value_size; hash_args_ptr = args[3]; if (hash_args_ptr->header.type == GRN_PTR) { hash = GRN_PTR_VALUE(hash_args_ptr); } if (hash->header.type != GRN_TABLE_HASH_KEY) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "fuzzy_search(): 3rd argument must be object literal: <%.*s>", (int)GRN_TEXT_LEN(args[3]), GRN_TEXT_VALUE(args[3])); goto exit; } hash = GRN_PTR_VALUE(hash_args_ptr); if (!(cursor = grn_hash_cursor_open(ctx, (grn_hash *)hash, NULL, 0, NULL, 0, 0, -1, 0))) { GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE, "fuzzy_search(): couldn't open cursor"); goto exit; } while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) { value_size = grn_hash_cursor_get_key_value(ctx, cursor, &key, &key_size, (void **)&value); if (key_size == 12 && !memcmp(key, "max_distance", 12)) { max_distance = GRN_UINT32_VALUE(value); } else if (key_size == 13 && !memcmp(key, "prefix_length", 13)) { prefix_length = GRN_UINT32_VALUE(value); } else if (key_size == 13 && !memcmp(key, "max_expansion", 13)) { max_expansion = GRN_UINT32_VALUE(value); } else if (key_size == 18 && !memcmp(key, "with_transposition", 18)) { if (GRN_BOOL_VALUE(value)) { flags |= GRN_TABLE_FUZZY_SEARCH_WITH_TRANSPOSITION; } } else { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "invalid option name: %.*s", key_size, (char *)key); grn_hash_cursor_close(ctx, cursor); goto exit; } } grn_hash_cursor_close(ctx, cursor); } if (index) { target = index; } else { if (obj->header.type == GRN_COLUMN_INDEX) { target = obj; } else { grn_column_index(ctx, obj, GRN_OP_FUZZY, &target, 1, NULL); } } if (target) { grn_obj *lexicon; use_sequential_search = GRN_TRUE; lexicon = grn_ctx_at(ctx, target->header.domain); if (lexicon) { if (lexicon->header.type == GRN_TABLE_PAT_KEY) { use_sequential_search = GRN_FALSE; } grn_obj_unlink(ctx, lexicon); } } else { if (grn_obj_is_key_accessor(ctx, obj) && table->header.type == GRN_TABLE_PAT_KEY) { target = table; } else { use_sequential_search = GRN_TRUE; } } if (prefix_length) { const char *s = GRN_TEXT_VALUE(query); const char *e = GRN_BULK_CURR(query); const char *p; unsigned int cl = 0; unsigned int length = 0; for (p = s; p < e && (cl = grn_charlen(ctx, p, e)); p += cl) { length++; if (length > prefix_length) { break; } } prefix_match_size = p - s; } if (use_sequential_search) { rc = sequential_fuzzy_search(ctx, table, obj, query, max_distance, prefix_match_size, max_expansion, flags, res, op); goto exit; } if (!target) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, target); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "fuzzy_search(): " "column must be COLUMN_INDEX or TABLE_PAT_KEY: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); rc = ctx->rc; GRN_OBJ_FIN(ctx, &inspected); } else { grn_search_optarg options = {0}; options.mode = GRN_OP_FUZZY; options.fuzzy.prefix_match_size = prefix_match_size; options.fuzzy.max_distance = max_distance; options.fuzzy.max_expansion = max_expansion; options.fuzzy.flags = flags; grn_obj_search(ctx, target, query, res, op, &options); } exit : return rc; }