void test_add_cond_with_copy_tag(void) { const gchar keyword[] = "Groonga"; unsigned int keyword_len; const gchar open_tag[] = "<<"; const gchar close_tag[] = ">>"; unsigned int open_tag_len, close_tag_len; keyword_len = strlen(keyword); open_tag_len = strlen(open_tag); close_tag_len = strlen(close_tag); default_flags = GRN_SNIP_COPY_TAG; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, keyword_len, open_tag, open_tag_len, close_tag, close_tag_len)); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, keyword_len, open_tag, open_tag_len, NULL, 0)); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, keyword_len, NULL, 0, close_tag, close_tag_len)); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, keyword_len, NULL, 0, NULL, 0)); }
void test_html_mapping(void) { const gchar open_tag[] = "<<"; unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar keyword[] = "indexing"; default_mapping = (grn_snip_mapping *)-1; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), open_tag, strlen(open_tag), NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text, strlen(text), &n_results, &max_tagged_len)); cut_assert_equal_uint(1, n_results); cut_assert_equal_uint(113, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("ngine, & combines the best of n-gram\n" "<<indexing]] and word <<indexing]] to achieve fast, " "precise searches. W", result); cut_assert_equal_uint(112, result_len); }
void test_exec_with_one_length_keyword(void) { unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar keyword[] = "x"; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text, strlen(text), &n_results, &max_tagged_len)); cut_assert_equal_uint(2, n_results); cut_assert_equal_uint(113, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("Groonga is an embeddable fullte[[x]]t search " "engine, which you can use in\n" "conjunction with various scrip", result); cut_assert_equal_uint(104, result_len); grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len)); cut_assert_equal_string("an inverted inde[[x]] based engine, & " "combines the best of n-gram\n" "inde[[x]]ing and word inde[[x]]ing to achieve ", result); cut_assert_equal_uint(112, result_len); }
void test_proper_tag_insertion(gconstpointer data) { unsigned int n_results; unsigned int max_tagged_len; const gchar keyword[] = "embeddable"; const gchar *expected; gchar *result; unsigned int text_len, keyword_len, result_len, expected_len; default_encoding = GRN_ENC_UTF8; default_flags = gcut_data_get_int(data, "flags"); text_len = strlen(text); keyword_len = strlen(keyword); expected = gcut_data_get_string(data, "expected"); expected_len = strlen(expected); cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, keyword_len, NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text, text_len, &n_results, &max_tagged_len)); cut_assert_equal_uint(1, n_results); cut_assert_equal_uint(expected_len + 1, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string(expected, result); cut_assert_equal_uint(expected_len, result_len); }
void test_exec_composed_decomposed_normalize_utf8(void) { unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar text[] = "Ⅶ¨abcde"; const gchar keyword[] = "ab"; default_encoding = GRN_ENC_UTF8; default_flags = GRN_SNIP_NORMALIZE; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text, strlen(text), &n_results, &max_tagged_len)); cut_assert_equal_uint(1, n_results); cut_assert_equal_uint(15, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("Ⅶ¨[[ab]]cde", result); cut_assert_equal_uint(14, result_len); }
void test_simple_exec_utf8(void) { unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar keyword[] = "エンジン"; default_encoding = GRN_ENC_UTF8; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text_ja_utf8, strlen(text_ja_utf8), &n_results, &max_tagged_len)); cut_assert_equal_uint(2, n_results); cut_assert_equal_uint(105, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("Groongaは組み込み型の全文検索[[エンジン]]です。" "DBMSやスクリプト言語処理系", result); cut_assert_equal_uint(102, result_len); grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len)); cut_assert_equal_string("度な転置\n" "インデックスタイプの[[エンジン]]です。" "コンパクトな実装ですが、", result); cut_assert_equal_uint(104, result_len); }
void test_simple_exec(void) { unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar keyword[] = "Groonga"; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text, strlen(text), &n_results, &max_tagged_len)); cut_assert_equal_uint(2, n_results); cut_assert_equal_uint(105, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("[[Groonga]] is an embeddable fulltext search engine, " "which you can use in\n" "conjunction with various scrip", result); cut_assert_equal_uint(104, result_len); grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len)); cut_assert_equal_string("ting languages and databases. [[Groonga]] is\n" "an inverted index based engine, & combines " "the best of n-gr", result); cut_assert_equal_uint(104, result_len); }
void test_html_mapping_escape(void) { const gchar close_tag[] = ">&>"; unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar keyword[] = "Ruby"; const gchar expected[] = "y not required.)</li>\n" " <li>[[Ruby>&> 1.8.1 or later " /* */"(for [[Ruby>&> binding.)" /* */"<a class="external" " /* */"href="; default_mapping = (grn_snip_mapping *)-1; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, close_tag, strlen(close_tag))); grn_test_assert(grn_snip_exec(&context, snip, html_text, strlen(html_text), &n_results, &max_tagged_len)); cut_assert_equal_uint(1, n_results); cut_assert_equal_uint(strlen(expected) + 1, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string(expected, result); cut_assert_equal_uint(strlen(expected), result_len); }
void test_exec_with_normalize(void) { unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar keyword[] = "転置インデックス"; default_encoding = GRN_ENC_UTF8; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text_ja_utf8, strlen(text_ja_utf8), &n_results, &max_tagged_len)); cut_assert_equal_uint(0, n_results); grn_obj_close(&context, (grn_obj *)snip); snip = NULL; default_flags = GRN_SNIP_NORMALIZE; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text_ja_utf8, strlen(text_ja_utf8), &n_results, &max_tagged_len)); cut_assert_equal_uint(1, n_results); cut_assert_equal_uint(105, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("備えた、高速かつ高精度な[[転置\n" "インデックス]]タイプのエンジンです。コン", result); cut_assert_equal_uint(104, result_len); }
void test_multi_conditions(void) { const gchar open_tag[] = "((*"; const gchar close_tag[] = "*))"; unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar keyword1[] = "fulltext"; const gchar keyword2[] = "groonga"; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword1, strlen(keyword1), open_tag, strlen(open_tag), close_tag, strlen(close_tag))); grn_test_assert(grn_snip_add_cond(&context, snip, keyword2, strlen(keyword2), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text, strlen(text), &n_results, &max_tagged_len)); cut_assert_equal_uint(2, n_results); cut_assert_equal_uint(107, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("Groonga is an embeddable ((*fulltext*)) search " "engine, which you can use in\n" "conjunction with various scrip", result); cut_assert_equal_uint(106, result_len); grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len)); cut_assert_equal_string("exing to achieve fast, precise searches. While\n" "[[groonga]] codebase is rather compact it is " "scalable eno", result); cut_assert_equal_uint(104, result_len); }
static grn_obj * func_snippet_tritonn(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_obj *snippets = NULL; if (nargs > 10) { grn_obj *text = args[0]; grn_obj *snip = NULL; unsigned int width = GRN_UINT64_VALUE(args[1]); unsigned int max_n_results = GRN_UINT64_VALUE(args[2]); grn_snip_mapping *mapping = NULL; int flags = GRN_SNIP_COPY_TAG; if(GRN_UINT64_VALUE(args[4])){ flags |= GRN_SNIP_SKIP_LEADING_SPACES; } if(GRN_UINT64_VALUE(args[5])){ mapping = GRN_SNIP_MAPPING_HTML_ESCAPE; } snip = grn_snip_open(ctx, flags, width, max_n_results, "", 0, "", 0, mapping); if (snip) { grn_rc rc; unsigned int i; if(GRN_TEXT_LEN(args[3])){ grn_obj * normalizer; normalizer = grn_ctx_get(ctx, GRN_TEXT_VALUE(args[3]), GRN_TEXT_LEN(args[3])); grn_snip_set_normalizer(ctx, snip, normalizer); } for(i = 8; i < (unsigned int)nargs; i += 3){ rc = grn_snip_add_cond(ctx, snip, GRN_TEXT_VALUE(args[i]), GRN_TEXT_LEN(args[i]), GRN_TEXT_VALUE(args[i + 1]), GRN_TEXT_LEN(args[i + 1]), GRN_TEXT_VALUE(args[i + 2]), GRN_TEXT_LEN(args[i + 2])); } snippets = snippet_exec(ctx, snip, text, user_data, args); grn_obj_close(ctx, snip); } } if(!snippets){ snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); } return snippets; }
void test_add_cond_with_too_large_keyword(void) { const gchar *sub_text; cut_assert_open_snip(); cut_assert_operator_int(strlen(text), >, default_width); grn_test_assert_equal_rc(GRN_INVALID_ARGUMENT, grn_snip_add_cond(&context, snip, text, strlen(text), NULL, 0, NULL, 0)); sub_text = text + strlen(text) - default_width; grn_test_assert(grn_snip_add_cond(&context, snip, sub_text, strlen(sub_text), NULL, 0, NULL, 0)); sub_text--; grn_test_assert_equal_rc(GRN_INVALID_ARGUMENT, grn_snip_add_cond(&context, snip, sub_text, strlen(sub_text), NULL, 0, NULL, 0)); }
void test_add_cond_with_invalid_argument(void) { unsigned int n_conds = 0, max_n_conds = 32U; const gchar keyword[] = "Groonga"; unsigned int keyword_len; const gchar open_tag[] = "<<"; const gchar close_tag[] = ">>"; unsigned int open_tag_len, close_tag_len; keyword_len = strlen(keyword); open_tag_len = strlen(open_tag); close_tag_len = strlen(close_tag); cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, keyword_len, open_tag, open_tag_len, close_tag, close_tag_len)); n_conds++; grn_test_assert_equal_rc(GRN_INVALID_ARGUMENT, grn_snip_add_cond(&context, NULL, keyword, keyword_len, open_tag, open_tag_len, close_tag, close_tag_len)); grn_test_assert_equal_rc(GRN_INVALID_ARGUMENT, grn_snip_add_cond(&context, snip, NULL, keyword_len, open_tag, open_tag_len, close_tag, close_tag_len)); grn_test_assert_equal_rc(GRN_INVALID_ARGUMENT, grn_snip_add_cond(&context, snip, keyword, 0, open_tag, open_tag_len, close_tag, close_tag_len)); while (n_conds < max_n_conds) { grn_test_assert(grn_snip_add_cond(&context, snip, keyword, keyword_len, open_tag, open_tag_len, close_tag, close_tag_len), cut_message("cond #%d", n_conds)); n_conds++; } grn_test_assert_equal_rc(GRN_INVALID_ARGUMENT, grn_snip_add_cond(&context, snip, keyword, keyword_len, open_tag, open_tag_len, close_tag, close_tag_len), cut_message("cond #%d", n_conds)); }
/* TODO: delete overlapping logic with exec_query */ static grn_rc snip_query(grn_ctx *ctx, grn_query *q, grn_snip *snip, grn_cell *c, grn_operator op, unsigned int n_tags, int c_but, const char **opentags, unsigned int *opentag_lens, const char **closetags, unsigned int *closetag_lens) { grn_cell *e, *ope = NIL; grn_operator op0 = GRN_OP_OR, *opp = &op0, op1 = q->default_op; while (c != NIL) { POP(e, c); switch (e->header.type) { case GRN_CELL_OP : ope = e; op1 = ope->u.op.op; continue; case GRN_CELL_STR : if (ope != NIL) { q->opt.mode = ope->u.op.mode == -1 ? q->default_mode : ope->u.op.mode; } else { q->opt.mode = q->default_mode; } if (!(c_but ^ (*opp == GRN_OP_BUT))) { grn_rc rc; unsigned int i = snip->cond_len % n_tags; if ((rc = grn_snip_add_cond(ctx, snip, e->u.b.value, e->u.b.size, opentags[i], opentag_lens[i], closetags[i], closetag_lens[i]))) { return rc; } } break; case GRN_CELL_LIST : snip_query(ctx, q, snip, e, *opp, n_tags, (*opp == GRN_OP_BUT) ? c_but ^ 1 : c_but, opentags, opentag_lens, closetags, closetag_lens); break; default : GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid object assigned in query!! (%d)", e->header.type); break; } opp = &op1; ope = NIL; op1 = q->default_op; } return GRN_SUCCESS; }
/* * call-seq: * snippet.add_keyword(keyword, options={}) * * _keyword_を追加する。_options_に指定可能な値は以下の通 * り。 * * [+:open_tag+] * 開始タグ。省略した場合はGroonga::Snippet.newで指定し * た+:default_open_tag+。 * * [+:close_tag+] * 終了タグ。省略した場合はGroonga::Snippet.newで指定し * た+:default_close_tag+。 */ static VALUE rb_grn_snippet_add_keyword (int argc, VALUE *argv, VALUE self) { RbGrnSnippet *rb_grn_snippet; grn_rc rc; VALUE rb_keyword, options; VALUE rb_open_tag, rb_close_tag; char *keyword, *open_tag = NULL, *close_tag = NULL; unsigned int keyword_length, open_tag_length = 0, close_tag_length = 0; rb_scan_args(argc, argv, "11", &rb_keyword, &options); rb_grn_snippet = SELF(self); keyword = StringValuePtr(rb_keyword); keyword_length = RSTRING_LEN(rb_keyword); rb_grn_scan_options(options, "open_tag", &rb_open_tag, "close_tag", &rb_close_tag, NULL); if (!NIL_P(rb_open_tag)) { open_tag = StringValuePtr(rb_open_tag); open_tag_length = RSTRING_LEN(rb_open_tag); } if (!NIL_P(rb_close_tag)) { close_tag = StringValuePtr(rb_close_tag); close_tag_length = RSTRING_LEN(rb_close_tag); } rc = grn_snip_add_cond(rb_grn_snippet->context, rb_grn_snippet->snippet, keyword, keyword_length, open_tag, open_tag_length, close_tag, close_tag_length); rb_grn_rc_check(rc, self); return Qnil; }
void test_simple_exec_euc_jp(void) { GError *error = NULL; unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; keyword = utf8_to_euc_jp("検索", &error); cut_assert_g_error(error); default_encoding = GRN_ENC_EUC_JP; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text_ja_euc, strlen(text_ja_euc), &n_results, &max_tagged_len)); cut_assert_equal_uint(2, n_results); cut_assert_equal_uint(108, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("型の全文[[検索]]エンジンです。" "DBMSやスクリプト言語処理系等に\n" "組み込むことによって、その全文[[検索]]機能を強", take_euc_jp_to_utf8(result)); cut_assert_equal_uint(107, result_len); grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len)); cut_assert_equal_string("です。コンパクトな実装ですが、大規模な文書\n" "量と[[検索]]要求を処理できるように設計されて" "います。また、純", take_euc_jp_to_utf8(result)); cut_assert_equal_uint(103, result_len); }
void test_simple_exec_sjis(void) { GError *error = NULL; unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; keyword = utf8_to_sjis("処理", &error); cut_assert_g_error(error); default_encoding = GRN_ENC_SJIS; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text_ja_sjis, strlen(text_ja_sjis), &n_results, &max_tagged_len)); cut_assert_equal_uint(2, n_results); cut_assert_equal_uint(104, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("み型の全文検索エンジンです。" "DBMSやスクリプト言語[[処理]]系等に\n" "組み込むことによって、その全文検索機能を", take_sjis_to_utf8(result)); cut_assert_equal_uint(103, result_len); grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len)); cut_assert_equal_string("パクトな実装ですが、大規模な文書\n" "量と検索要求を[[処理]]できるように設計" "されています。また、純粋なn-gram", take_sjis_to_utf8(result)); cut_assert_equal_uint(103, result_len); }
void test_invalid_result_index(void) { unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar keyword[] = "index"; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text, strlen(text), &n_results, &max_tagged_len)); cut_assert_equal_uint(1, n_results); cut_assert_equal_uint(113, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); grn_test_assert_equal_rc(GRN_INVALID_ARGUMENT, grn_snip_get_result(&context, snip, 1, result, &result_len)); }
void test_customized_tag(void) { const gchar open_tag[] = "((*"; const gchar close_tag[] = "*))"; unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar keyword[] = "engine"; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), open_tag, strlen(open_tag), close_tag, strlen(close_tag))); grn_test_assert(grn_snip_exec(&context, snip, text, strlen(text), &n_results, &max_tagged_len)); cut_assert_equal_uint(2, n_results); cut_assert_equal_uint(107, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("Groonga is an embeddable fulltext search " "((*engine*)), which you can use in\n" "conjunction with various scrip", result); cut_assert_equal_uint(106, result_len); grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len)); cut_assert_equal_string(" databases. Groonga is\n" "an inverted index based ((*engine*)), " "& combines the best of n-gram\n" "indexing and wo", result); cut_assert_equal_uint(106, result_len); }
static my_bool mrn_snippet_prepare(st_mrn_snip_info *snip_info, UDF_ARGS *args, char *message, grn_snip **snippet) { unsigned int i; CHARSET_INFO *cs; grn_ctx *ctx = &snip_info->ctx; long long snip_max_len; long long snip_max_num; long long skip_leading_spaces; long long html_escape; int flags = GRN_SNIP_COPY_TAG; grn_snip_mapping *mapping = NULL; grn_rc rc; String *result_str = &snip_info->result_str; *snippet = NULL; snip_max_len = *((long long *) args->args[1]); snip_max_num = *((long long *) args->args[2]); if (args->arg_type[3] == STRING_RESULT) { if (!(cs = get_charset_by_name(args->args[3], MYF(0)))) { snprintf(message, MYSQL_ERRMSG_SIZE, "Unknown charset: <%s>", args->args[3]); goto error; } } else { uint charset_id = static_cast<uint>(*((long long *) args->args[3])); if (!(cs = get_charset(charset_id, MYF(0)))) { snprintf(message, MYSQL_ERRMSG_SIZE, "Unknown charset ID: <%u>", charset_id); goto error; } } if (!mrn::encoding::set(ctx, cs)) { snprintf(message, MYSQL_ERRMSG_SIZE, "Unsupported charset: <%s>", cs->name); goto error; } if (!(cs->state & (MY_CS_BINSORT | MY_CS_CSSORT))) { flags |= GRN_SNIP_NORMALIZE; } skip_leading_spaces = *((long long *) args->args[4]); if (skip_leading_spaces) { flags |= GRN_SNIP_SKIP_LEADING_SPACES; } html_escape = *((long long *) args->args[5]); if (html_escape) { mapping = (grn_snip_mapping *) -1; } *snippet = grn_snip_open(ctx, flags, static_cast<unsigned int>(snip_max_len), static_cast<unsigned int>(snip_max_num), "", 0, "", 0, mapping); if (ctx->rc) { snprintf(message, MYSQL_ERRMSG_SIZE, "Failed to open grn_snip: <%s>", ctx->errbuf); goto error; } for (i = 8; i < args->arg_count; i += 3) { rc = grn_snip_add_cond(ctx, *snippet, args->args[i], args->lengths[i], args->args[i + 1], args->lengths[i + 1], args->args[i + 2], args->lengths[i + 2]); if (rc) { snprintf(message, MYSQL_ERRMSG_SIZE, "Failed to add a condition to grn_snip: <%s>", ctx->errbuf); goto error; } } result_str->set_charset(cs); return FALSE; error: if (*snippet) { grn_snip_close(ctx, *snippet); } return TRUE; }
/* TODO: support caching for the same parameter. */ static grn_obj * func_snippet(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_obj *snippets = NULL; #define N_REQUIRED_ARGS 1 #define KEYWORD_SET_SIZE 3 if (nargs > N_REQUIRED_ARGS) { grn_obj *text = args[0]; grn_obj *end_arg = args[nargs - 1]; grn_obj *snip = NULL; unsigned int width = 200; unsigned int max_n_results = 3; grn_snip_mapping *mapping = NULL; int flags = GRN_SNIP_SKIP_LEADING_SPACES; const char *prefix = NULL; int prefix_length = 0; const char *suffix = NULL; int suffix_length = 0; const char *normalizer_name = NULL; int normalizer_name_length = 0; const char *default_open_tag = NULL; int default_open_tag_length = 0; const char *default_close_tag = NULL; int default_close_tag_length = 0; int n_args_without_option = nargs; if (end_arg->header.type == GRN_TABLE_HASH_KEY) { grn_obj *options = end_arg; grn_hash_cursor *cursor; void *key; int key_size; grn_obj *value; n_args_without_option--; cursor = grn_hash_cursor_open(ctx, (grn_hash *)options, NULL, 0, NULL, 0, 0, -1, 0); if (!cursor) { GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE, "snippet(): couldn't open cursor"); goto exit; } while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) { grn_hash_cursor_get_key_value(ctx, cursor, &key, &key_size, (void **)&value); if (key_size == 5 && !memcmp(key, "width", 5)) { width = GRN_UINT32_VALUE(value); } else if (key_size == 13 && !memcmp(key, "max_n_results", 13)) { max_n_results = GRN_UINT32_VALUE(value); } else if (key_size == 19 && !memcmp(key, "skip_leading_spaces", 19)) { if (GRN_BOOL_VALUE(value) == GRN_FALSE) { flags &= ~GRN_SNIP_SKIP_LEADING_SPACES; } } else if (key_size == 11 && !memcmp(key, "html_escape", 11)) { if (GRN_BOOL_VALUE(value)) { mapping = GRN_SNIP_MAPPING_HTML_ESCAPE; } } else if (key_size == 6 && !memcmp(key, "prefix", 6)) { prefix = GRN_TEXT_VALUE(value); prefix_length = GRN_TEXT_LEN(value); } else if (key_size == 6 && !memcmp(key, "suffix", 6)) { suffix = GRN_TEXT_VALUE(value); suffix_length = GRN_TEXT_LEN(value); } else if (key_size == 10 && !memcmp(key, "normalizer", 10)) { normalizer_name = GRN_TEXT_VALUE(value); normalizer_name_length = GRN_TEXT_LEN(value); } else if (key_size == 16 && !memcmp(key, "default_open_tag", 16)) { default_open_tag = GRN_TEXT_VALUE(value); default_open_tag_length = GRN_TEXT_LEN(value); } else if (key_size == 17 && !memcmp(key, "default_close_tag", 17)) { default_close_tag = GRN_TEXT_VALUE(value); default_close_tag_length = GRN_TEXT_LEN(value); } else { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "invalid option name: <%.*s>", key_size, (char *)key); grn_hash_cursor_close(ctx, cursor); goto exit; } } grn_hash_cursor_close(ctx, cursor); } snip = grn_snip_open(ctx, flags, width, max_n_results, default_open_tag, default_open_tag_length, default_close_tag, default_close_tag_length, mapping); if (snip) { grn_rc rc; unsigned int i; if (!normalizer_name) { grn_snip_set_normalizer(ctx, snip, GRN_NORMALIZER_AUTO); } else if (normalizer_name_length > 0) { grn_obj *normalizer; normalizer = grn_ctx_get(ctx, normalizer_name, normalizer_name_length); if (!grn_obj_is_normalizer_proc(ctx, normalizer)) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, normalizer); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "snippet(): not normalizer: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); grn_obj_unlink(ctx, normalizer); goto exit; } grn_snip_set_normalizer(ctx, snip, normalizer); grn_obj_unlink(ctx, normalizer); } if (default_open_tag_length == 0 && default_close_tag_length == 0) { unsigned int n_keyword_sets = (n_args_without_option - N_REQUIRED_ARGS) / KEYWORD_SET_SIZE; grn_obj **keyword_set_args = args + N_REQUIRED_ARGS; for (i = 0; i < n_keyword_sets; i++) { rc = grn_snip_add_cond(ctx, snip, GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE]), GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE]), GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE + 1]), GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE + 1]), GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE + 2]), GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE + 2])); } } else { unsigned int n_keywords = n_args_without_option - N_REQUIRED_ARGS; grn_obj **keyword_args = args + N_REQUIRED_ARGS; for (i = 0; i < n_keywords; i++) { rc = grn_snip_add_cond(ctx, snip, GRN_TEXT_VALUE(keyword_args[i]), GRN_TEXT_LEN(keyword_args[i]), NULL, 0, NULL, 0); } } snippets = snippet_exec(ctx, snip, text, user_data, prefix, prefix_length, suffix, suffix_length); } } #undef KEYWORD_SET_SIZE #undef N_REQUIRED_ARGS exit : if (!snippets) { snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); } return snippets; }
static mrn_bool mrn_snippet_html_prepare(mrn_snippet_html_info *info, UDF_ARGS *args, char *message, grn_obj **snippet) { MRN_DBUG_ENTER_FUNCTION(); grn_ctx *ctx = info->ctx; int flags = GRN_SNIP_SKIP_LEADING_SPACES; unsigned int width = 200; unsigned int max_n_results = 3; const char *open_tag = "<span class=\"keyword\">"; const char *close_tag = "</span>"; grn_snip_mapping *mapping = GRN_SNIP_MAPPING_HTML_ESCAPE; grn_obj *expr = NULL; *snippet = NULL; mrn::encoding::set_raw(ctx, system_charset_info); if (!(system_charset_info->state & (MY_CS_BINSORT | MY_CS_CSSORT))) { flags |= GRN_SNIP_NORMALIZE; } *snippet = grn_snip_open(ctx, flags, width, max_n_results, open_tag, strlen(open_tag), close_tag, strlen(close_tag), mapping); if (ctx->rc != GRN_SUCCESS) { if (message) { snprintf(message, MYSQL_ERRMSG_SIZE, "mroonga_snippet_html(): failed to open grn_snip: <%s>", ctx->errbuf); } goto error; } if (info->query_mode.used) { if (!info->query_mode.table) { grn_obj *short_text; short_text = grn_ctx_at(info->ctx, GRN_DB_SHORT_TEXT); info->query_mode.table = grn_table_create(info->ctx, NULL, 0, NULL, GRN_TABLE_HASH_KEY, short_text, NULL); } if (!info->query_mode.default_column) { info->query_mode.default_column = grn_obj_column(info->ctx, info->query_mode.table, GRN_COLUMN_NAME_KEY, GRN_COLUMN_NAME_KEY_LEN); } grn_obj *record = NULL; GRN_EXPR_CREATE_FOR_QUERY(info->ctx, info->query_mode.table, expr, record); if (!expr) { if (message) { snprintf(message, MYSQL_ERRMSG_SIZE, "mroonga_snippet_html(): " "failed to create expression: <%s>", ctx->errbuf); } goto error; } mrn::QueryParser query_parser(info->ctx, current_thd, expr, info->query_mode.default_column, 0, NULL); grn_rc rc = query_parser.parse(args->args[1], args->lengths[1]); if (rc != GRN_SUCCESS) { if (message) { snprintf(message, MYSQL_ERRMSG_SIZE, "mroonga_snippet_html(): " "failed to parse query: <%s>", ctx->errbuf); } goto error; } rc = grn_expr_snip_add_conditions(info->ctx, expr, *snippet, 0, NULL, NULL, NULL, NULL); if (rc != GRN_SUCCESS) { if (message) { snprintf(message, MYSQL_ERRMSG_SIZE, "mroonga_snippet_html(): " "failed to add conditions: <%s>", ctx->errbuf); } goto error; } } else { unsigned int i; for (i = 1; i < args->arg_count; ++i) { if (!args->args[i]) { continue; } grn_rc rc = grn_snip_add_cond(ctx, *snippet, args->args[i], args->lengths[i], NULL, 0, NULL, 0); if (rc != GRN_SUCCESS) { if (message) { snprintf(message, MYSQL_ERRMSG_SIZE, "mroonga_snippet_html(): " "failed to add a condition to grn_snip: <%s>", ctx->errbuf); } goto error; } } } DBUG_RETURN(false); error: if (expr) { grn_obj_close(ctx, expr); } if (*snippet) { grn_obj_close(ctx, *snippet); } DBUG_RETURN(true); }