void test_simple_exec_utf8(void) { unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar keyword[] = "エンジン"; default_encoding = GRN_ENC_UTF8; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text_ja_utf8, strlen(text_ja_utf8), &n_results, &max_tagged_len)); cut_assert_equal_uint(2, n_results); cut_assert_equal_uint(105, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("Groongaは組み込み型の全文検索[[エンジン]]です。" "DBMSやスクリプト言語処理系", result); cut_assert_equal_uint(102, result_len); grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len)); cut_assert_equal_string("度な転置\n" "インデックスタイプの[[エンジン]]です。" "コンパクトな実装ですが、", result); cut_assert_equal_uint(104, result_len); }
void test_simple_exec(void) { unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar keyword[] = "Groonga"; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text, strlen(text), &n_results, &max_tagged_len)); cut_assert_equal_uint(2, n_results); cut_assert_equal_uint(105, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("[[Groonga]] is an embeddable fulltext search engine, " "which you can use in\n" "conjunction with various scrip", result); cut_assert_equal_uint(104, result_len); grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len)); cut_assert_equal_string("ting languages and databases. [[Groonga]] is\n" "an inverted index based engine, & combines " "the best of n-gr", result); cut_assert_equal_uint(104, result_len); }
void test_exec_with_one_length_keyword(void) { unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar keyword[] = "x"; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text, strlen(text), &n_results, &max_tagged_len)); cut_assert_equal_uint(2, n_results); cut_assert_equal_uint(113, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("Groonga is an embeddable fullte[[x]]t search " "engine, which you can use in\n" "conjunction with various scrip", result); cut_assert_equal_uint(104, result_len); grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len)); cut_assert_equal_string("an inverted inde[[x]] based engine, & " "combines the best of n-gram\n" "inde[[x]]ing and word inde[[x]]ing to achieve ", result); cut_assert_equal_uint(112, result_len); }
void test_html_mapping(void) { const gchar open_tag[] = "<<"; unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar keyword[] = "indexing"; default_mapping = (grn_snip_mapping *)-1; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), open_tag, strlen(open_tag), NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text, strlen(text), &n_results, &max_tagged_len)); cut_assert_equal_uint(1, n_results); cut_assert_equal_uint(113, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("ngine, & combines the best of n-gram\n" "<<indexing]] and word <<indexing]] to achieve fast, " "precise searches. W", result); cut_assert_equal_uint(112, result_len); }
void test_proper_tag_insertion(gconstpointer data) { unsigned int n_results; unsigned int max_tagged_len; const gchar keyword[] = "embeddable"; const gchar *expected; gchar *result; unsigned int text_len, keyword_len, result_len, expected_len; default_encoding = GRN_ENC_UTF8; default_flags = gcut_data_get_int(data, "flags"); text_len = strlen(text); keyword_len = strlen(keyword); expected = gcut_data_get_string(data, "expected"); expected_len = strlen(expected); cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, keyword_len, NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text, text_len, &n_results, &max_tagged_len)); cut_assert_equal_uint(1, n_results); cut_assert_equal_uint(expected_len + 1, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string(expected, result); cut_assert_equal_uint(expected_len, result_len); }
void test_exec_composed_decomposed_normalize_utf8(void) { unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar text[] = "Ⅶ¨abcde"; const gchar keyword[] = "ab"; default_encoding = GRN_ENC_UTF8; default_flags = GRN_SNIP_NORMALIZE; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text, strlen(text), &n_results, &max_tagged_len)); cut_assert_equal_uint(1, n_results); cut_assert_equal_uint(15, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("Ⅶ¨[[ab]]cde", result); cut_assert_equal_uint(14, result_len); }
void test_html_mapping_escape(void) { const gchar close_tag[] = ">&>"; unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar keyword[] = "Ruby"; const gchar expected[] = "y not required.)</li>\n" " <li>[[Ruby>&> 1.8.1 or later " /* */"(for [[Ruby>&> binding.)" /* */"<a class="external" " /* */"href="; default_mapping = (grn_snip_mapping *)-1; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, close_tag, strlen(close_tag))); grn_test_assert(grn_snip_exec(&context, snip, html_text, strlen(html_text), &n_results, &max_tagged_len)); cut_assert_equal_uint(1, n_results); cut_assert_equal_uint(strlen(expected) + 1, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string(expected, result); cut_assert_equal_uint(strlen(expected), result_len); }
static grn_obj * snippet_exec(grn_ctx *ctx, grn_obj *snip, grn_obj *text, grn_user_data *user_data, const char *prefix, int prefix_length, const char *suffix, int suffix_length) { grn_rc rc; unsigned int i, n_results, max_tagged_length; grn_obj snippet_buffer; grn_obj *snippets; if (GRN_TEXT_LEN(text) == 0) { return NULL; } rc = grn_snip_exec(ctx, snip, GRN_TEXT_VALUE(text), GRN_TEXT_LEN(text), &n_results, &max_tagged_length); if (rc != GRN_SUCCESS) { return NULL; } if (n_results == 0) { return grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); } snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_SHORT_TEXT, GRN_OBJ_VECTOR); if (!snippets) { return NULL; } GRN_TEXT_INIT(&snippet_buffer, 0); grn_bulk_space(ctx, &snippet_buffer, prefix_length + max_tagged_length + suffix_length); for (i = 0; i < n_results; i++) { unsigned int snippet_length; GRN_BULK_REWIND(&snippet_buffer); if (prefix_length) { GRN_TEXT_PUT(ctx, &snippet_buffer, prefix, prefix_length); } rc = grn_snip_get_result(ctx, snip, i, GRN_TEXT_VALUE(&snippet_buffer) + prefix_length, &snippet_length); if (rc == GRN_SUCCESS) { grn_strncat(GRN_TEXT_VALUE(&snippet_buffer), GRN_BULK_WSIZE(&snippet_buffer), suffix, suffix_length); grn_vector_add_element(ctx, snippets, GRN_TEXT_VALUE(&snippet_buffer), prefix_length + snippet_length + suffix_length, 0, GRN_DB_SHORT_TEXT); } } GRN_OBJ_FIN(ctx, &snippet_buffer); return snippets; }
void test_multi_conditions(void) { const gchar open_tag[] = "((*"; const gchar close_tag[] = "*))"; unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar keyword1[] = "fulltext"; const gchar keyword2[] = "groonga"; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword1, strlen(keyword1), open_tag, strlen(open_tag), close_tag, strlen(close_tag))); grn_test_assert(grn_snip_add_cond(&context, snip, keyword2, strlen(keyword2), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text, strlen(text), &n_results, &max_tagged_len)); cut_assert_equal_uint(2, n_results); cut_assert_equal_uint(107, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("Groonga is an embeddable ((*fulltext*)) search " "engine, which you can use in\n" "conjunction with various scrip", result); cut_assert_equal_uint(106, result_len); grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len)); cut_assert_equal_string("exing to achieve fast, precise searches. While\n" "[[groonga]] codebase is rather compact it is " "scalable eno", result); cut_assert_equal_uint(104, result_len); }
void test_simple_exec_sjis(void) { GError *error = NULL; unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; keyword = utf8_to_sjis("処理", &error); cut_assert_g_error(error); default_encoding = GRN_ENC_SJIS; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text_ja_sjis, strlen(text_ja_sjis), &n_results, &max_tagged_len)); cut_assert_equal_uint(2, n_results); cut_assert_equal_uint(104, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("み型の全文検索エンジンです。" "DBMSやスクリプト言語[[処理]]系等に\n" "組み込むことによって、その全文検索機能を", take_sjis_to_utf8(result)); cut_assert_equal_uint(103, result_len); grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len)); cut_assert_equal_string("パクトな実装ですが、大規模な文書\n" "量と検索要求を[[処理]]できるように設計" "されています。また、純粋なn-gram", take_sjis_to_utf8(result)); cut_assert_equal_uint(103, result_len); }
void test_simple_exec_euc_jp(void) { GError *error = NULL; unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; keyword = utf8_to_euc_jp("検索", &error); cut_assert_g_error(error); default_encoding = GRN_ENC_EUC_JP; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text_ja_euc, strlen(text_ja_euc), &n_results, &max_tagged_len)); cut_assert_equal_uint(2, n_results); cut_assert_equal_uint(108, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("型の全文[[検索]]エンジンです。" "DBMSやスクリプト言語処理系等に\n" "組み込むことによって、その全文[[検索]]機能を強", take_euc_jp_to_utf8(result)); cut_assert_equal_uint(107, result_len); grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len)); cut_assert_equal_string("です。コンパクトな実装ですが、大規模な文書\n" "量と[[検索]]要求を処理できるように設計されて" "います。また、純", take_euc_jp_to_utf8(result)); cut_assert_equal_uint(103, result_len); }
void test_invalid_result_index(void) { unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar keyword[] = "index"; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text, strlen(text), &n_results, &max_tagged_len)); cut_assert_equal_uint(1, n_results); cut_assert_equal_uint(113, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); grn_test_assert_equal_rc(GRN_INVALID_ARGUMENT, grn_snip_get_result(&context, snip, 1, result, &result_len)); }
void test_customized_tag(void) { const gchar open_tag[] = "((*"; const gchar close_tag[] = "*))"; unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar keyword[] = "engine"; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), open_tag, strlen(open_tag), close_tag, strlen(close_tag))); grn_test_assert(grn_snip_exec(&context, snip, text, strlen(text), &n_results, &max_tagged_len)); cut_assert_equal_uint(2, n_results); cut_assert_equal_uint(107, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("Groonga is an embeddable fulltext search " "((*engine*)), which you can use in\n" "conjunction with various scrip", result); cut_assert_equal_uint(106, result_len); grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len)); cut_assert_equal_string(" databases. Groonga is\n" "an inverted index based ((*engine*)), " "& combines the best of n-gram\n" "indexing and wo", result); cut_assert_equal_uint(106, result_len); }
/* * call-seq: * snippet.execute(string) -> スニペットの配列 * * _string_を走査し、スニペットを作成する。 */ static VALUE rb_grn_snippet_execute (VALUE self, VALUE rb_string) { RbGrnSnippet *rb_grn_snippet; grn_rc rc; grn_ctx *context; grn_snip *snippet; char *string; unsigned int string_length; unsigned int i, n_results, max_tagged_length; VALUE rb_results; char *result; if (TYPE(rb_string) != T_STRING) { rb_raise(rb_eGrnInvalidArgument, "snippet text must be String: <%s>", rb_grn_inspect(rb_string)); } rb_grn_snippet = SELF(self); context = rb_grn_snippet->context; snippet = rb_grn_snippet->snippet; #ifdef HAVE_RUBY_ENCODING_H rb_string = rb_grn_context_rb_string_encode(context, rb_string); #endif string = StringValuePtr(rb_string); string_length = RSTRING_LEN(rb_string); rc = grn_snip_exec(context, snippet, string, string_length, &n_results, &max_tagged_length); rb_grn_rc_check(rc, self); rb_results = rb_ary_new2(n_results); result = ALLOCA_N(char, max_tagged_length); for (i = 0; i < n_results; i++) { VALUE rb_result; unsigned result_length; rc = grn_snip_get_result(context, snippet, i, result, &result_length); rb_grn_rc_check(rc, self); rb_result = rb_grn_context_rb_string_new(context, result, result_length); rb_ary_push(rb_results, rb_result); } return rb_results; }
void test_exec_with_normalize(void) { unsigned int n_results; unsigned int max_tagged_len; unsigned int result_len; const gchar keyword[] = "転置インデックス"; default_encoding = GRN_ENC_UTF8; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text_ja_utf8, strlen(text_ja_utf8), &n_results, &max_tagged_len)); cut_assert_equal_uint(0, n_results); grn_obj_close(&context, (grn_obj *)snip); snip = NULL; default_flags = GRN_SNIP_NORMALIZE; cut_assert_open_snip(); grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword), NULL, 0, NULL, 0)); grn_test_assert(grn_snip_exec(&context, snip, text_ja_utf8, strlen(text_ja_utf8), &n_results, &max_tagged_len)); cut_assert_equal_uint(1, n_results); cut_assert_equal_uint(105, max_tagged_len); result = g_new(gchar, max_tagged_len); grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len)); cut_assert_equal_string("備えた、高速かつ高精度な[[転置\n" "インデックス]]タイプのエンジンです。コン", result); cut_assert_equal_uint(104, result_len); }
MRN_API char *mroonga_snippet(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) { st_mrn_snip_info *snip_info = (st_mrn_snip_info *) initid->ptr; grn_ctx *ctx = &snip_info->ctx; String *result_str = &snip_info->result_str; char *target; unsigned int target_length; grn_snip *snippet = NULL; grn_rc rc; unsigned int i, n_results, max_tagged_length, result_length; if (!args->args[0]) { *is_null = 1; return NULL; } *is_null = 0; target = args->args[0]; target_length = args->lengths[0]; if (!snip_info->snippet) { for (i = 1; i < args->arg_count; i++) { if (!args->args[i]) { my_printf_error(ER_MRN_INVALID_NULL_VALUE_NUM, ER_MRN_INVALID_NULL_VALUE_STR, MYF(0), "mroonga_snippet() arguments"); goto error; } } if (mrn_snippet_prepare(snip_info, args, NULL, &snippet)) { goto error; } } else { snippet = snip_info->snippet; } rc = grn_snip_exec(ctx, snippet, target, target_length, &n_results, &max_tagged_length); if (rc) { my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM, ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf); goto error; } result_str->length(0); if (result_str->reserve((args->lengths[6] + args->lengths[7] + max_tagged_length) * n_results)) { my_error(ER_OUT_OF_RESOURCES, MYF(0), HA_ERR_OUT_OF_MEM); goto error; } for (i = 0; i < n_results; i++) { result_str->q_append(args->args[6], args->lengths[6]); rc = grn_snip_get_result(ctx, snippet, i, (char *) result_str->ptr() + result_str->length(), &result_length); if (rc) { my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM, ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf); goto error; } result_str->length(result_str->length() + result_length); result_str->q_append(args->args[7], args->lengths[7]); } if (!snip_info->snippet) { rc = grn_snip_close(ctx, snippet); if (rc) { my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM, ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf); goto error; } } *length = result_str->length(); return (char *) result_str->ptr(); error: *error = 1; return NULL; }
MRN_API char *mroonga_snippet_html(UDF_INIT *init, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) { MRN_DBUG_ENTER_FUNCTION(); mrn_snippet_html_info *info = reinterpret_cast<mrn_snippet_html_info *>(init->ptr); grn_ctx *ctx = info->ctx; grn_obj *snippet = info->snippet; grn_obj *result_buffer = &(info->result); if (!args->args[0]) { *is_null = 1; DBUG_RETURN(NULL); } if (!snippet) { if (mrn_snippet_html_prepare(info, args, NULL, &snippet)) { goto error; } } { char *target = args->args[0]; unsigned int target_length = args->lengths[0]; unsigned int n_results, max_tagged_length; { grn_rc rc = grn_snip_exec(ctx, snippet, target, target_length, &n_results, &max_tagged_length); if (rc != GRN_SUCCESS) { my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM, ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf); goto error; } } *is_null = 0; GRN_BULK_REWIND(result_buffer); { const char *start_tag = "<div class=\"snippet\">"; const char *end_tag = "</div>"; size_t start_tag_length = strlen(start_tag); size_t end_tag_length = strlen(end_tag); for (unsigned int i = 0; i < n_results; ++i) { GRN_TEXT_PUT(ctx, result_buffer, start_tag, start_tag_length); grn_bulk_reserve(ctx, result_buffer, max_tagged_length); unsigned int result_length; grn_rc rc = grn_snip_get_result(ctx, snippet, i, GRN_BULK_CURR(result_buffer), &result_length); if (rc) { my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM, ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf); goto error; } grn_bulk_space(ctx, result_buffer, result_length); GRN_TEXT_PUT(ctx, result_buffer, end_tag, end_tag_length); } } if (!info->snippet) { grn_rc rc = grn_obj_close(ctx, snippet); if (rc != GRN_SUCCESS) { my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM, ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf); goto error; } } } *length = GRN_TEXT_LEN(result_buffer); DBUG_RETURN(GRN_TEXT_VALUE(result_buffer)); error: if (!info->snippet && snippet) { grn_obj_close(ctx, snippet); } *is_null = 1; *error = 1; DBUG_RETURN(NULL); }