static grn_obj * snippet_exec(grn_ctx *ctx, grn_obj *snip, grn_obj *text, grn_user_data *user_data, const char *prefix, int prefix_length, const char *suffix, int suffix_length) { grn_rc rc; unsigned int i, n_results, max_tagged_length; grn_obj snippet_buffer; grn_obj *snippets; if (GRN_TEXT_LEN(text) == 0) { return NULL; } rc = grn_snip_exec(ctx, snip, GRN_TEXT_VALUE(text), GRN_TEXT_LEN(text), &n_results, &max_tagged_length); if (rc != GRN_SUCCESS) { return NULL; } if (n_results == 0) { return grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); } snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_SHORT_TEXT, GRN_OBJ_VECTOR); if (!snippets) { return NULL; } GRN_TEXT_INIT(&snippet_buffer, 0); grn_bulk_space(ctx, &snippet_buffer, prefix_length + max_tagged_length + suffix_length); for (i = 0; i < n_results; i++) { unsigned int snippet_length; GRN_BULK_REWIND(&snippet_buffer); if (prefix_length) { GRN_TEXT_PUT(ctx, &snippet_buffer, prefix, prefix_length); } rc = grn_snip_get_result(ctx, snip, i, GRN_TEXT_VALUE(&snippet_buffer) + prefix_length, &snippet_length); if (rc == GRN_SUCCESS) { grn_strncat(GRN_TEXT_VALUE(&snippet_buffer), GRN_BULK_WSIZE(&snippet_buffer), suffix, suffix_length); grn_vector_add_element(ctx, snippets, GRN_TEXT_VALUE(&snippet_buffer), prefix_length + snippet_length + suffix_length, 0, GRN_DB_SHORT_TEXT); } } GRN_OBJ_FIN(ctx, &snippet_buffer); return snippets; }
static grn_obj * func_highlight_html(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_obj *highlighted = NULL; grn_obj *string; grn_obj *lexicon = NULL; grn_obj *expression = NULL; grn_highlighter *highlighter; grn_obj *highlighter_ptr; if (!(1 <= nargs && nargs <= 2)) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "highlight_html(): wrong number of arguments (%d for 1..2)", nargs); highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); return highlighted; } string = args[0]; if (nargs == 2) { lexicon = args[1]; } grn_proc_get_info(ctx, user_data, NULL, NULL, &expression); highlighter_ptr = grn_expr_get_var(ctx, expression, GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME, strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME)); if (highlighter_ptr) { highlighter = (grn_highlighter *)GRN_PTR_VALUE(highlighter_ptr); } else { highlighter_ptr = grn_expr_get_or_add_var(ctx, expression, GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME, strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME)); GRN_OBJ_FIN(ctx, highlighter_ptr); GRN_PTR_INIT(highlighter_ptr, GRN_OBJ_OWN, GRN_DB_OBJECT); highlighter = func_highlight_html_create_highlighter(ctx, expression); grn_highlighter_set_lexicon(ctx, highlighter, lexicon); GRN_PTR_SET(ctx, highlighter_ptr, highlighter); } highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_TEXT, 0); grn_highlighter_highlight(ctx, highlighter, GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string), highlighted); return highlighted; }
static grn_obj * highlight_keywords(grn_ctx *ctx, grn_user_data *user_data, grn_obj *string, grn_obj *keywords, grn_bool use_html_escape, const char *default_open_tag, unsigned int default_open_tag_length, const char *default_close_tag, unsigned int default_close_tag_length) { grn_obj *highlighted = NULL; const char *open_tags[1]; unsigned int open_tag_lengths[1]; const char *close_tags[1]; unsigned int close_tag_lengths[1]; unsigned int n_keyword_sets = 1; open_tags[0] = default_open_tag; open_tag_lengths[0] = default_open_tag_length; close_tags[0] = default_close_tag; close_tag_lengths[0] = default_close_tag_length; highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_TEXT, 0); grn_pat_tag_keys(ctx, keywords, GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string), open_tags, open_tag_lengths, close_tags, close_tag_lengths, n_keyword_sets, highlighted, use_html_escape); return highlighted; }
static grn_obj * func_query_expander_tsv(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_rc rc = GRN_END_OF_DATA; grn_id id; grn_obj *term, *expanded_term; void *value; grn_obj *rc_object; term = args[0]; expanded_term = args[1]; id = grn_hash_get(ctx, synonyms, GRN_TEXT_VALUE(term), GRN_TEXT_LEN(term), &value); if (id != GRN_ID_NIL) { const char *query = value; GRN_TEXT_PUTS(ctx, expanded_term, query); rc = GRN_SUCCESS; } rc_object = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_INT32, 0); if (rc_object) { GRN_INT32_SET(ctx, rc_object, rc); } return rc_object; }
static grn_obj * func_snippet_tritonn(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_obj *snippets = NULL; if (nargs > 10) { grn_obj *text = args[0]; grn_obj *snip = NULL; unsigned int width = GRN_UINT64_VALUE(args[1]); unsigned int max_n_results = GRN_UINT64_VALUE(args[2]); grn_snip_mapping *mapping = NULL; int flags = GRN_SNIP_COPY_TAG; if(GRN_UINT64_VALUE(args[4])){ flags |= GRN_SNIP_SKIP_LEADING_SPACES; } if(GRN_UINT64_VALUE(args[5])){ mapping = GRN_SNIP_MAPPING_HTML_ESCAPE; } snip = grn_snip_open(ctx, flags, width, max_n_results, "", 0, "", 0, mapping); if (snip) { grn_rc rc; unsigned int i; if(GRN_TEXT_LEN(args[3])){ grn_obj * normalizer; normalizer = grn_ctx_get(ctx, GRN_TEXT_VALUE(args[3]), GRN_TEXT_LEN(args[3])); grn_snip_set_normalizer(ctx, snip, normalizer); } for(i = 8; i < (unsigned int)nargs; i += 3){ rc = grn_snip_add_cond(ctx, snip, GRN_TEXT_VALUE(args[i]), GRN_TEXT_LEN(args[i]), GRN_TEXT_VALUE(args[i + 1]), GRN_TEXT_LEN(args[i + 1]), GRN_TEXT_VALUE(args[i + 2]), GRN_TEXT_LEN(args[i + 2])); } snippets = snippet_exec(ctx, snip, text, user_data, args); grn_obj_close(ctx, snip); } } if(!snippets){ snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); } return snippets; }
static grn_obj * func_highlight_html(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_obj *highlighted = NULL; #define N_REQUIRED_ARGS 1 if (nargs == N_REQUIRED_ARGS) { grn_obj *string = args[0]; grn_obj *expression = NULL; grn_obj *keywords; grn_obj *keywords_ptr; grn_bool use_html_escape = GRN_TRUE; grn_proc_get_info(ctx, user_data, NULL, NULL, &expression); keywords_ptr = grn_expr_get_var(ctx, expression, GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME, strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME)); if (keywords_ptr) { keywords = GRN_PTR_VALUE(keywords_ptr); } else { keywords_ptr = grn_expr_get_or_add_var(ctx, expression, GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME, strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME)); GRN_OBJ_FIN(ctx, keywords_ptr); GRN_PTR_INIT(keywords_ptr, GRN_OBJ_OWN, GRN_DB_OBJECT); keywords = func_highlight_html_create_keywords_table(ctx, expression); GRN_PTR_SET(ctx, keywords_ptr, keywords); } highlighted = highlight_keywords(ctx, user_data, string, keywords, use_html_escape, "<span class=\"keyword\">", strlen("<span class=\"keyword\">"), "</span>", strlen("</span>")); } #undef N_REQUIRED_ARGS if (!highlighted) { highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); } return highlighted; }
static grn_obj * func_vector_size(grn_ctx *ctx, int n_args, grn_obj **args, grn_user_data *user_data) { grn_obj *target; unsigned int size; grn_obj *grn_size; if (n_args != 1) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_size(): wrong number of arguments (%d for 1)", n_args); return NULL; } target = args[0]; switch (target->header.type) { case GRN_VECTOR : case GRN_PVECTOR : case GRN_UVECTOR : size = grn_vector_size(ctx, target); break; default : { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, target, &inspected); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_size(): target object must be vector: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } break; } grn_size = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_UINT32, 0); if (!grn_size) { return NULL; } GRN_UINT32_SET(ctx, grn_size, size); return grn_size; }
static grn_obj * func_edit_distance(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { #define N_REQUIRED_ARGS 2 #define MAX_ARGS 3 int d = 0; int flags = 0; grn_obj *obj; if (nargs >= N_REQUIRED_ARGS && nargs <= MAX_ARGS) { if (nargs == MAX_ARGS && GRN_BOOL_VALUE(args[2])) { flags |= GRN_TABLE_FUZZY_SEARCH_WITH_TRANSPOSITION; } d = calc_edit_distance(ctx, GRN_TEXT_VALUE(args[0]), GRN_BULK_CURR(args[0]), GRN_TEXT_VALUE(args[1]), GRN_BULK_CURR(args[1]), flags); } if ((obj = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_UINT32, 0))) { GRN_UINT32_SET(ctx, obj, d); } return obj; #undef N_REQUIRED_ARGS #undef MAX_ARGS }
static grn_obj * func_highlight_full(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_obj *highlighted = NULL; #define N_REQUIRED_ARGS 3 #define KEYWORD_SET_SIZE 3 if ((nargs >= (N_REQUIRED_ARGS + KEYWORD_SET_SIZE) && (nargs - N_REQUIRED_ARGS) % KEYWORD_SET_SIZE == 0)) { grn_obj *string = args[0]; grn_obj *keywords; const char *normalizer_name = GRN_TEXT_VALUE(args[1]); unsigned int normalizer_name_length = GRN_TEXT_LEN(args[1]); grn_bool use_html_escape = GRN_BOOL_VALUE(args[2]); keywords = func_highlight_create_keywords_table(ctx, user_data, normalizer_name, normalizer_name_length); if (keywords) { highlighted = highlight_keyword_sets(ctx, user_data, args + N_REQUIRED_ARGS, nargs - N_REQUIRED_ARGS, string, keywords, use_html_escape); grn_obj_unlink(ctx, keywords); } } if (!highlighted) { highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); } #undef KEYWORD_SET_SIZE #undef N_REQUIRED_ARGS return highlighted; }
static grn_obj * func_time_classify_raw(grn_ctx *ctx, int n_args, grn_obj **args, grn_user_data *user_data, const char *function_name, grn_time_classify_unit unit) { grn_obj *time; uint32_t interval_raw = 1; grn_obj *classed_time; grn_bool accept_interval = GRN_TRUE; switch (unit) { case GRN_TIME_CLASSIFY_UNIT_SECOND : case GRN_TIME_CLASSIFY_UNIT_MINUTE : case GRN_TIME_CLASSIFY_UNIT_HOUR : accept_interval = GRN_TRUE; break; case GRN_TIME_CLASSIFY_UNIT_DAY : case GRN_TIME_CLASSIFY_UNIT_WEEK : accept_interval = GRN_FALSE; break; case GRN_TIME_CLASSIFY_UNIT_MONTH : case GRN_TIME_CLASSIFY_UNIT_YEAR : accept_interval = GRN_TRUE; break; } if (accept_interval) { if (!(n_args == 1 || n_args == 2)) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "%s(): " "wrong number of arguments (%d for 1..2)", function_name, n_args); return NULL; } } else { if (n_args != 1) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "%s(): " "wrong number of arguments (%d for 1)", function_name, n_args); return NULL; } } time = args[0]; if (!(time->header.type == GRN_BULK && time->header.domain == GRN_DB_TIME)) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, time); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "%s(): " "the first argument must be a time: " "<%.*s>", function_name, (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } if (n_args == 2) { grn_obj *interval; grn_obj casted_interval; interval = args[1]; if (!(interval->header.type == GRN_BULK && grn_type_id_is_number_family(ctx, interval->header.domain))) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, interval); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "%s(): " "the second argument must be a number: " "<%.*s>", function_name, (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } GRN_VALUE_FIX_SIZE_INIT(&casted_interval, 0, GRN_DB_UINT32); grn_obj_cast(ctx, interval, &casted_interval, GRN_FALSE); interval_raw = GRN_UINT32_VALUE(&casted_interval); GRN_OBJ_FIN(ctx, &casted_interval); } { int64_t time_raw; struct tm tm; int64_t classed_time_raw; time_raw = GRN_TIME_VALUE(time); if (!grn_time_to_tm(ctx, time_raw, &tm)) { return NULL; } switch (unit) { case GRN_TIME_CLASSIFY_UNIT_SECOND : tm.tm_sec = (tm.tm_sec / interval_raw) * interval_raw; break; case GRN_TIME_CLASSIFY_UNIT_MINUTE : tm.tm_min = (tm.tm_min / interval_raw) * interval_raw; tm.tm_sec = 0; break; case GRN_TIME_CLASSIFY_UNIT_HOUR : tm.tm_hour = (tm.tm_hour / interval_raw) * interval_raw; tm.tm_min = 0; tm.tm_sec = 0; break; case GRN_TIME_CLASSIFY_UNIT_DAY : tm.tm_hour = 0; tm.tm_min = 0; tm.tm_sec = 0; break; case GRN_TIME_CLASSIFY_UNIT_WEEK : if ((tm.tm_mday - tm.tm_wday) >= 0) { tm.tm_mday -= tm.tm_wday; } else { int n_underflowed_mday = -(tm.tm_mday - tm.tm_wday); int mday; int max_mday = 31; if (tm.tm_mon == 0) { tm.tm_year--; tm.tm_mon = 11; } else { tm.tm_mon--; } for (mday = max_mday; mday > n_underflowed_mday; mday--) { int64_t unused; tm.tm_mday = mday; if (grn_time_from_tm(ctx, &unused, &tm)) { break; } } tm.tm_mday -= n_underflowed_mday; } tm.tm_hour = 0; tm.tm_min = 0; tm.tm_sec = 0; break; case GRN_TIME_CLASSIFY_UNIT_MONTH : tm.tm_mon = (tm.tm_mon / interval_raw) * interval_raw; tm.tm_mday = 1; tm.tm_hour = 0; tm.tm_min = 0; tm.tm_sec = 0; break; case GRN_TIME_CLASSIFY_UNIT_YEAR : tm.tm_year = (((1900 + tm.tm_year) / interval_raw) * interval_raw) - 1900; tm.tm_mon = 0; tm.tm_mday = 1; tm.tm_hour = 0; tm.tm_min = 0; tm.tm_sec = 0; break; } if (!grn_time_from_tm(ctx, &classed_time_raw, &tm)) { return NULL; } classed_time = grn_plugin_proc_alloc(ctx, user_data, time->header.domain, 0); if (!classed_time) { return NULL; } GRN_TIME_SET(ctx, classed_time, classed_time_raw); return classed_time; } }
/* TODO: support caching for the same parameter. */ static grn_obj * func_snippet(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_obj *snippets = NULL; #define N_REQUIRED_ARGS 1 #define KEYWORD_SET_SIZE 3 if (nargs > N_REQUIRED_ARGS) { grn_obj *text = args[0]; grn_obj *end_arg = args[nargs - 1]; grn_obj *snip = NULL; unsigned int width = 200; unsigned int max_n_results = 3; grn_snip_mapping *mapping = NULL; int flags = GRN_SNIP_SKIP_LEADING_SPACES; const char *prefix = NULL; int prefix_length = 0; const char *suffix = NULL; int suffix_length = 0; const char *normalizer_name = NULL; int normalizer_name_length = 0; const char *default_open_tag = NULL; int default_open_tag_length = 0; const char *default_close_tag = NULL; int default_close_tag_length = 0; int n_args_without_option = nargs; if (end_arg->header.type == GRN_TABLE_HASH_KEY) { grn_obj *options = end_arg; grn_hash_cursor *cursor; void *key; int key_size; grn_obj *value; n_args_without_option--; cursor = grn_hash_cursor_open(ctx, (grn_hash *)options, NULL, 0, NULL, 0, 0, -1, 0); if (!cursor) { GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE, "snippet(): couldn't open cursor"); goto exit; } while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) { grn_hash_cursor_get_key_value(ctx, cursor, &key, &key_size, (void **)&value); if (key_size == 5 && !memcmp(key, "width", 5)) { width = GRN_UINT32_VALUE(value); } else if (key_size == 13 && !memcmp(key, "max_n_results", 13)) { max_n_results = GRN_UINT32_VALUE(value); } else if (key_size == 19 && !memcmp(key, "skip_leading_spaces", 19)) { if (GRN_BOOL_VALUE(value) == GRN_FALSE) { flags &= ~GRN_SNIP_SKIP_LEADING_SPACES; } } else if (key_size == 11 && !memcmp(key, "html_escape", 11)) { if (GRN_BOOL_VALUE(value)) { mapping = GRN_SNIP_MAPPING_HTML_ESCAPE; } } else if (key_size == 6 && !memcmp(key, "prefix", 6)) { prefix = GRN_TEXT_VALUE(value); prefix_length = GRN_TEXT_LEN(value); } else if (key_size == 6 && !memcmp(key, "suffix", 6)) { suffix = GRN_TEXT_VALUE(value); suffix_length = GRN_TEXT_LEN(value); } else if (key_size == 10 && !memcmp(key, "normalizer", 10)) { normalizer_name = GRN_TEXT_VALUE(value); normalizer_name_length = GRN_TEXT_LEN(value); } else if (key_size == 16 && !memcmp(key, "default_open_tag", 16)) { default_open_tag = GRN_TEXT_VALUE(value); default_open_tag_length = GRN_TEXT_LEN(value); } else if (key_size == 17 && !memcmp(key, "default_close_tag", 17)) { default_close_tag = GRN_TEXT_VALUE(value); default_close_tag_length = GRN_TEXT_LEN(value); } else { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "invalid option name: <%.*s>", key_size, (char *)key); grn_hash_cursor_close(ctx, cursor); goto exit; } } grn_hash_cursor_close(ctx, cursor); } snip = grn_snip_open(ctx, flags, width, max_n_results, default_open_tag, default_open_tag_length, default_close_tag, default_close_tag_length, mapping); if (snip) { grn_rc rc; unsigned int i; if (!normalizer_name) { grn_snip_set_normalizer(ctx, snip, GRN_NORMALIZER_AUTO); } else if (normalizer_name_length > 0) { grn_obj *normalizer; normalizer = grn_ctx_get(ctx, normalizer_name, normalizer_name_length); if (!grn_obj_is_normalizer_proc(ctx, normalizer)) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, normalizer); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "snippet(): not normalizer: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); grn_obj_unlink(ctx, normalizer); goto exit; } grn_snip_set_normalizer(ctx, snip, normalizer); grn_obj_unlink(ctx, normalizer); } if (default_open_tag_length == 0 && default_close_tag_length == 0) { unsigned int n_keyword_sets = (n_args_without_option - N_REQUIRED_ARGS) / KEYWORD_SET_SIZE; grn_obj **keyword_set_args = args + N_REQUIRED_ARGS; for (i = 0; i < n_keyword_sets; i++) { rc = grn_snip_add_cond(ctx, snip, GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE]), GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE]), GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE + 1]), GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE + 1]), GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE + 2]), GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE + 2])); } } else { unsigned int n_keywords = n_args_without_option - N_REQUIRED_ARGS; grn_obj **keyword_args = args + N_REQUIRED_ARGS; for (i = 0; i < n_keywords; i++) { rc = grn_snip_add_cond(ctx, snip, GRN_TEXT_VALUE(keyword_args[i]), GRN_TEXT_LEN(keyword_args[i]), NULL, 0, NULL, 0); } } snippets = snippet_exec(ctx, snip, text, user_data, prefix, prefix_length, suffix, suffix_length); } } #undef KEYWORD_SET_SIZE #undef N_REQUIRED_ARGS exit : if (!snippets) { snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); } return snippets; }
static grn_obj * func_snippet_html(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_obj *snippets = NULL; /* TODO: support parameters */ if (nargs == 1) { grn_obj *text = args[0]; grn_obj *expression = NULL; grn_obj *condition_ptr = NULL; grn_obj *condition = NULL; grn_obj *snip = NULL; int flags = GRN_SNIP_SKIP_LEADING_SPACES; unsigned int width = 200; unsigned int max_n_results = 3; const char *open_tag = "<span class=\"keyword\">"; const char *close_tag = "</span>"; grn_snip_mapping *mapping = GRN_SNIP_MAPPING_HTML_ESCAPE; grn_proc_get_info(ctx, user_data, NULL, NULL, &expression); condition_ptr = grn_expr_get_var(ctx, expression, GRN_SELECT_INTERNAL_VAR_CONDITION, strlen(GRN_SELECT_INTERNAL_VAR_CONDITION)); if (condition_ptr) { condition = GRN_PTR_VALUE(condition_ptr); } if (condition) { grn_obj *snip_ptr; snip_ptr = grn_expr_get_var(ctx, expression, GRN_FUNC_SNIPPET_HTML_CACHE_NAME, strlen(GRN_FUNC_SNIPPET_HTML_CACHE_NAME)); if (snip_ptr) { snip = GRN_PTR_VALUE(snip_ptr); } else { snip_ptr = grn_expr_get_or_add_var(ctx, expression, GRN_FUNC_SNIPPET_HTML_CACHE_NAME, strlen(GRN_FUNC_SNIPPET_HTML_CACHE_NAME)); GRN_OBJ_FIN(ctx, snip_ptr); GRN_PTR_INIT(snip_ptr, GRN_OBJ_OWN, GRN_DB_OBJECT); snip = grn_snip_open(ctx, flags, width, max_n_results, open_tag, strlen(open_tag), close_tag, strlen(close_tag), mapping); if (snip) { grn_snip_set_normalizer(ctx, snip, GRN_NORMALIZER_AUTO); grn_expr_snip_add_conditions(ctx, condition, snip, 0, NULL, NULL, NULL, NULL); GRN_PTR_SET(ctx, snip_ptr, snip); } } } if (snip) { snippets = snippet_exec(ctx, snip, text, user_data, NULL, 0, NULL, 0); } } if (!snippets) { snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); } return snippets; }
static grn_obj * func_highlight(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_obj *highlighted = NULL; #define N_REQUIRED_ARGS 1 if (nargs > N_REQUIRED_ARGS) { grn_obj *string = args[0]; grn_bool use_html_escape = GRN_FALSE; grn_obj *keywords; const char *normalizer_name = "NormalizerAuto"; unsigned int normalizer_name_length = 14; const char *default_open_tag = NULL; unsigned int default_open_tag_length = 0; const char *default_close_tag = NULL; unsigned int default_close_tag_length = 0; grn_obj *end_arg = args[nargs - 1]; int n_args_without_option = nargs; if (end_arg->header.type == GRN_TABLE_HASH_KEY) { grn_obj *options = end_arg; grn_hash_cursor *cursor; void *key; grn_obj *value; int key_size; n_args_without_option--; cursor = grn_hash_cursor_open(ctx, (grn_hash *)options, NULL, 0, NULL, 0, 0, -1, 0); if (!cursor) { GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE, "highlight(): couldn't open cursor"); goto exit; } while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) { grn_hash_cursor_get_key_value(ctx, cursor, &key, &key_size, (void **)&value); if (key_size == 10 && !memcmp(key, "normalizer", 10)) { normalizer_name = GRN_TEXT_VALUE(value); normalizer_name_length = GRN_TEXT_LEN(value); } else if (key_size == 11 && !memcmp(key, "html_escape", 11)) { if (GRN_BOOL_VALUE(value)) { use_html_escape = GRN_TRUE; } } else if (key_size == 16 && !memcmp(key, "default_open_tag", 16)) { default_open_tag = GRN_TEXT_VALUE(value); default_open_tag_length = GRN_TEXT_LEN(value); } else if (key_size == 17 && !memcmp(key, "default_close_tag", 17)) { default_close_tag = GRN_TEXT_VALUE(value); default_close_tag_length = GRN_TEXT_LEN(value); } else { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "invalid option name: <%.*s>", key_size, (char *)key); grn_hash_cursor_close(ctx, cursor); goto exit; } } grn_hash_cursor_close(ctx, cursor); } keywords = func_highlight_create_keywords_table(ctx, user_data, normalizer_name, normalizer_name_length); if (keywords) { grn_obj **keyword_args = args + N_REQUIRED_ARGS; unsigned int n_keyword_args = n_args_without_option - N_REQUIRED_ARGS; if (default_open_tag_length == 0 && default_close_tag_length == 0) { highlighted = highlight_keyword_sets(ctx, user_data, keyword_args, n_keyword_args, string, keywords, use_html_escape); } else { unsigned int i; for (i = 0; i < n_keyword_args; i++) { grn_table_add(ctx, keywords, GRN_TEXT_VALUE(keyword_args[i]), GRN_TEXT_LEN(keyword_args[i]), NULL); } highlighted = highlight_keywords(ctx, user_data, string, keywords, use_html_escape, default_open_tag, default_open_tag_length, default_close_tag, default_close_tag_length); } } } #undef N_REQUIRED_ARGS exit : if (!highlighted) { highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); } return highlighted; }
static grn_obj * highlight_keyword_sets(grn_ctx *ctx, grn_user_data *user_data, grn_obj **keyword_set_args, unsigned int n_keyword_args, grn_obj *string, grn_obj *keywords, grn_bool use_html_escape) { grn_obj *highlighted = NULL; #define KEYWORD_SET_SIZE 3 { unsigned int i; unsigned int n_keyword_sets; grn_obj open_tags; grn_obj open_tag_lengths; grn_obj close_tags; grn_obj close_tag_lengths; n_keyword_sets = n_keyword_args / KEYWORD_SET_SIZE; GRN_OBJ_INIT(&open_tags, GRN_BULK, 0, GRN_DB_VOID); GRN_OBJ_INIT(&open_tag_lengths, GRN_BULK, 0, GRN_DB_VOID); GRN_OBJ_INIT(&close_tags, GRN_BULK, 0, GRN_DB_VOID); GRN_OBJ_INIT(&close_tag_lengths, GRN_BULK, 0, GRN_DB_VOID); for (i = 0; i < n_keyword_sets; i++) { grn_obj *keyword = keyword_set_args[i * KEYWORD_SET_SIZE + 0]; grn_obj *open_tag = keyword_set_args[i * KEYWORD_SET_SIZE + 1]; grn_obj *close_tag = keyword_set_args[i * KEYWORD_SET_SIZE + 2]; grn_table_add(ctx, keywords, GRN_TEXT_VALUE(keyword), GRN_TEXT_LEN(keyword), NULL); { const char *open_tag_content = GRN_TEXT_VALUE(open_tag); grn_bulk_write(ctx, &open_tags, (const char *)(&open_tag_content), sizeof(char *)); } { unsigned int open_tag_length = GRN_TEXT_LEN(open_tag); grn_bulk_write(ctx, &open_tag_lengths, (const char *)(&open_tag_length), sizeof(unsigned int)); } { const char *close_tag_content = GRN_TEXT_VALUE(close_tag); grn_bulk_write(ctx, &close_tags, (const char *)(&close_tag_content), sizeof(char *)); } { unsigned int close_tag_length = GRN_TEXT_LEN(close_tag); grn_bulk_write(ctx, &close_tag_lengths, (const char *)(&close_tag_length), sizeof(unsigned int)); } } highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_TEXT, 0); grn_pat_tag_keys(ctx, keywords, GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string), (const char **)GRN_BULK_HEAD(&open_tags), (unsigned int *)GRN_BULK_HEAD(&open_tag_lengths), (const char **)GRN_BULK_HEAD(&close_tags), (unsigned int *)GRN_BULK_HEAD(&close_tag_lengths), n_keyword_sets, highlighted, use_html_escape); grn_obj_unlink(ctx, &open_tags); grn_obj_unlink(ctx, &open_tag_lengths); grn_obj_unlink(ctx, &close_tags); grn_obj_unlink(ctx, &close_tag_lengths); } #undef KEYWORD_SET_SIZE return highlighted; }
static grn_obj * func_vector_slice(grn_ctx *ctx, int n_args, grn_obj **args, grn_user_data *user_data) { grn_obj *target; grn_obj *from_raw = NULL; grn_obj *length_raw = NULL; int64_t from = 0; int64_t length = -1; uint32_t to = 0; uint32_t size = 0; grn_obj *slice; if (n_args < 2 || n_args > 3) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_slice(): wrong number of arguments (%d for 2..3)", n_args); return NULL; } target = args[0]; from_raw = args[1]; if (n_args == 3) { length_raw = args[2]; } switch (target->header.type) { case GRN_VECTOR : case GRN_PVECTOR : case GRN_UVECTOR : size = grn_vector_size(ctx, target); break; default : { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, target, &inspected); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_slice(): target object must be vector: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } break; } if (!grn_type_id_is_number_family(ctx, from_raw->header.domain)) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, from_raw); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_slice(): from must be a number: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } if (from_raw->header.domain == GRN_DB_INT32) { from = GRN_INT32_VALUE(from_raw); } else if (from_raw->header.domain == GRN_DB_INT64) { from = GRN_INT64_VALUE(from_raw); } else { grn_obj buffer; grn_rc rc; GRN_INT64_INIT(&buffer, 0); rc = grn_obj_cast(ctx, from_raw, &buffer, GRN_FALSE); if (rc == GRN_SUCCESS) { from = GRN_INT64_VALUE(&buffer); } GRN_OBJ_FIN(ctx, &buffer); if (rc != GRN_SUCCESS) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, from_raw); GRN_PLUGIN_ERROR(ctx, rc, "vector_slice(): " "failed to cast from value to number: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } } if (length_raw) { if (!grn_type_id_is_number_family(ctx, length_raw->header.domain)) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, length_raw); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_slice(): length must be a number: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } if (length_raw->header.domain == GRN_DB_INT32) { length = GRN_INT32_VALUE(length_raw); } else if (length_raw->header.domain == GRN_DB_INT64) { length = GRN_INT64_VALUE(length_raw); } else { grn_obj buffer; grn_rc rc; GRN_INT64_INIT(&buffer, 0); rc = grn_obj_cast(ctx, length_raw, &buffer, GRN_FALSE); if (rc == GRN_SUCCESS) { length = GRN_INT64_VALUE(&buffer); } GRN_OBJ_FIN(ctx, &buffer); if (rc != GRN_SUCCESS) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, length_raw); GRN_PLUGIN_ERROR(ctx, rc, "vector_slice(): " "failed to cast length value to number: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } } } slice = grn_plugin_proc_alloc(ctx, user_data, target->header.domain, GRN_OBJ_VECTOR); if (!slice) { return NULL; } if (target->header.flags & GRN_OBJ_WITH_WEIGHT) { slice->header.flags |= GRN_OBJ_WITH_WEIGHT; } if (length < 0) { length = size + length + 1; } if (length > size) { length = size; } if (length <= 0) { return slice; } while (from < 0) { from += size; } to = from + length; if (to > size) { to = size; } switch (target->header.type) { case GRN_VECTOR : { unsigned int i; for (i = from; i < to; i++) { const char *content; unsigned int content_length; unsigned int weight; grn_id domain; content_length = grn_vector_get_element(ctx, target, i, &content, &weight, &domain); grn_vector_add_element(ctx, slice, content, content_length, weight, domain); } } break; case GRN_PVECTOR : { unsigned int i; for (i = from; i < to; i++) { grn_obj *element = GRN_PTR_VALUE_AT(target, i); GRN_PTR_PUT(ctx, slice, element); } } break; case GRN_UVECTOR : { unsigned int i; for (i = from; i < to; i++) { grn_id id; unsigned int weight; id = grn_uvector_get_element(ctx, target, i, &weight); grn_uvector_add_element(ctx, slice, id, weight); } } break; } return slice; }