Beispiel #1
0
void
test_add_cond_with_copy_tag(void)
{
  const gchar keyword[] = "Groonga";
  unsigned int keyword_len;
  const gchar open_tag[] = "<<";
  const gchar close_tag[] = ">>";
  unsigned int open_tag_len, close_tag_len;

  keyword_len = strlen(keyword);
  open_tag_len = strlen(open_tag);
  close_tag_len = strlen(close_tag);

  default_flags = GRN_SNIP_COPY_TAG;

  cut_assert_open_snip();

  grn_test_assert(grn_snip_add_cond(&context, snip,
                                    keyword, keyword_len,
                                    open_tag, open_tag_len,
                                    close_tag, close_tag_len));
  grn_test_assert(grn_snip_add_cond(&context, snip,
                                    keyword, keyword_len,
                                    open_tag, open_tag_len,
                                    NULL, 0));
  grn_test_assert(grn_snip_add_cond(&context, snip,
                                    keyword, keyword_len,
                                    NULL, 0,
                                    close_tag, close_tag_len));
  grn_test_assert(grn_snip_add_cond(&context, snip,
                                    keyword, keyword_len,
                                    NULL, 0,
                                    NULL, 0));
}
Beispiel #2
0
void
test_html_mapping(void)
{
  const gchar open_tag[] = "<<";
  unsigned int n_results;
  unsigned int max_tagged_len;
  unsigned int result_len;
  const gchar keyword[] = "indexing";

  default_mapping = (grn_snip_mapping *)-1;
  cut_assert_open_snip();
  grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword),
                                    open_tag, strlen(open_tag), NULL, 0));

  grn_test_assert(grn_snip_exec(&context, snip, text, strlen(text),
                                &n_results, &max_tagged_len));
  cut_assert_equal_uint(1, n_results);
  cut_assert_equal_uint(113, max_tagged_len);
  result = g_new(gchar, max_tagged_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len));
  cut_assert_equal_string("ngine, &amp; combines the best of n-gram\n"
                          "<<indexing]] and word <<indexing]] to achieve fast, "
                          "precise searches. W",
                          result);
  cut_assert_equal_uint(112, result_len);
}
Beispiel #3
0
void
test_exec_with_one_length_keyword(void)
{
  unsigned int n_results;
  unsigned int max_tagged_len;
  unsigned int result_len;
  const gchar keyword[] = "x";

  cut_assert_open_snip();
  grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword),
                                    NULL, 0, NULL, 0));

  grn_test_assert(grn_snip_exec(&context, snip,
                                text, strlen(text),
                                &n_results, &max_tagged_len));
  cut_assert_equal_uint(2, n_results);
  cut_assert_equal_uint(113, max_tagged_len);
  result = g_new(gchar, max_tagged_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len));
  cut_assert_equal_string("Groonga is an embeddable fullte[[x]]t search "
                          "engine, which you can use in\n"
                          "conjunction with various scrip",
                          result);
  cut_assert_equal_uint(104, result_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len));
  cut_assert_equal_string("an inverted inde[[x]] based engine, & "
                          "combines the best of n-gram\n"
                          "inde[[x]]ing and word inde[[x]]ing to achieve ",
                          result);
  cut_assert_equal_uint(112, result_len);
}
Beispiel #4
0
void
test_proper_tag_insertion(gconstpointer data)
{
  unsigned int n_results;
  unsigned int max_tagged_len;
  const gchar keyword[] = "embeddable";
  const gchar *expected;
  gchar *result;
  unsigned int text_len, keyword_len, result_len, expected_len;

  default_encoding = GRN_ENC_UTF8;
  default_flags = gcut_data_get_int(data, "flags");

  text_len = strlen(text);
  keyword_len = strlen(keyword);
  expected = gcut_data_get_string(data, "expected");
  expected_len = strlen(expected);

  cut_assert_open_snip();
  grn_test_assert(grn_snip_add_cond(&context, snip, keyword, keyword_len,
                                    NULL, 0, NULL, 0));

  grn_test_assert(grn_snip_exec(&context, snip, text, text_len, &n_results,
                                &max_tagged_len));
  cut_assert_equal_uint(1, n_results);
  cut_assert_equal_uint(expected_len + 1, max_tagged_len);
  result = g_new(gchar, max_tagged_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len));
  cut_assert_equal_string(expected, result);
  cut_assert_equal_uint(expected_len, result_len);
}
Beispiel #5
0
void
test_exec_composed_decomposed_normalize_utf8(void)
{
  unsigned int n_results;
  unsigned int max_tagged_len;
  unsigned int result_len;
  const gchar text[] = "Ⅶ¨abcde";
  const gchar keyword[] = "ab";

  default_encoding = GRN_ENC_UTF8;
  default_flags = GRN_SNIP_NORMALIZE;

  cut_assert_open_snip();
  grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword),
                                    NULL, 0, NULL, 0));

  grn_test_assert(grn_snip_exec(&context, snip,
                                text, strlen(text),
                                &n_results, &max_tagged_len));
  cut_assert_equal_uint(1, n_results);
  cut_assert_equal_uint(15, max_tagged_len);
  result = g_new(gchar, max_tagged_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len));
  cut_assert_equal_string("Ⅶ¨[[ab]]cde",
                          result);
  cut_assert_equal_uint(14, result_len);
}
Beispiel #6
0
void
test_simple_exec_utf8(void)
{
  unsigned int n_results;
  unsigned int max_tagged_len;
  unsigned int result_len;
  const gchar keyword[] = "エンジン";

  default_encoding = GRN_ENC_UTF8;

  cut_assert_open_snip();
  grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword),
                                    NULL, 0, NULL, 0));

  grn_test_assert(grn_snip_exec(&context, snip,
                                text_ja_utf8, strlen(text_ja_utf8),
                                &n_results, &max_tagged_len));
  cut_assert_equal_uint(2, n_results);
  cut_assert_equal_uint(105, max_tagged_len);
  result = g_new(gchar, max_tagged_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len));
  cut_assert_equal_string("Groongaは組み込み型の全文検索[[エンジン]]です。"
                          "DBMSやスクリプト言語処理系",
                          result);
  cut_assert_equal_uint(102, result_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len));
  cut_assert_equal_string("度な転置\n"
                          "インデックスタイプの[[エンジン]]です。"
                          "コンパクトな実装ですが、",
                          result);
  cut_assert_equal_uint(104, result_len);
}
Beispiel #7
0
void
test_simple_exec(void)
{
  unsigned int n_results;
  unsigned int max_tagged_len;
  unsigned int result_len;
  const gchar keyword[] = "Groonga";

  cut_assert_open_snip();
  grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword),
                                    NULL, 0, NULL, 0));

  grn_test_assert(grn_snip_exec(&context, snip, text, strlen(text),
                                &n_results, &max_tagged_len));
  cut_assert_equal_uint(2, n_results);
  cut_assert_equal_uint(105, max_tagged_len);
  result = g_new(gchar, max_tagged_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len));
  cut_assert_equal_string("[[Groonga]] is an embeddable fulltext search engine, "
                          "which you can use in\n"
                          "conjunction with various scrip",
                          result);
  cut_assert_equal_uint(104, result_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len));
  cut_assert_equal_string("ting languages and databases. [[Groonga]] is\n"
                          "an inverted index based engine, & combines "
                          "the best of n-gr",
                          result);
  cut_assert_equal_uint(104, result_len);
}
Beispiel #8
0
void
test_html_mapping_escape(void)
{
  const gchar close_tag[] = ">&>";
  unsigned int n_results;
  unsigned int max_tagged_len;
  unsigned int result_len;
  const gchar keyword[] = "Ruby";
  const gchar expected[] =
    "y not required.)&lt;/li&gt;\n"
    "          &lt;li&gt;[[Ruby>&> 1.8.1 or later "
    /*                */"(for [[Ruby>&> binding.)"
    /*                */"&lt;a class=&quot;external&quot; "
    /*                      */"href=";

  default_mapping = (grn_snip_mapping *)-1;
  cut_assert_open_snip();
  grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword),
                                    NULL, 0, close_tag, strlen(close_tag)));

  grn_test_assert(grn_snip_exec(&context, snip, html_text, strlen(html_text),
                                &n_results, &max_tagged_len));
  cut_assert_equal_uint(1, n_results);
  cut_assert_equal_uint(strlen(expected) + 1, max_tagged_len);
  result = g_new(gchar, max_tagged_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len));
  cut_assert_equal_string(expected, result);
  cut_assert_equal_uint(strlen(expected), result_len);
}
Beispiel #9
0
void
test_exec_with_normalize(void)
{
  unsigned int n_results;
  unsigned int max_tagged_len;
  unsigned int result_len;
  const gchar keyword[] = "転置インデックス";

  default_encoding = GRN_ENC_UTF8;

  cut_assert_open_snip();
  grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword),
                                    NULL, 0, NULL, 0));

  grn_test_assert(grn_snip_exec(&context, snip,
                                text_ja_utf8, strlen(text_ja_utf8),
                                &n_results, &max_tagged_len));
  cut_assert_equal_uint(0, n_results);

  grn_obj_close(&context, (grn_obj *)snip);
  snip = NULL;


  default_flags = GRN_SNIP_NORMALIZE;

  cut_assert_open_snip();
  grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword),
                                    NULL, 0, NULL, 0));

  grn_test_assert(grn_snip_exec(&context, snip,
                                text_ja_utf8, strlen(text_ja_utf8),
                                &n_results, &max_tagged_len));
  cut_assert_equal_uint(1, n_results);
  cut_assert_equal_uint(105, max_tagged_len);
  result = g_new(gchar, max_tagged_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len));
  cut_assert_equal_string("備えた、高速かつ高精度な[[転置\n"
                          "インデックス]]タイプのエンジンです。コン",
                          result);
  cut_assert_equal_uint(104, result_len);
}
Beispiel #10
0
void
test_multi_conditions(void)
{
  const gchar open_tag[] = "((*";
  const gchar close_tag[] = "*))";
  unsigned int n_results;
  unsigned int max_tagged_len;
  unsigned int result_len;
  const gchar keyword1[] = "fulltext";
  const gchar keyword2[] = "groonga";

  cut_assert_open_snip();
  grn_test_assert(grn_snip_add_cond(&context, snip, keyword1, strlen(keyword1),
                                    open_tag, strlen(open_tag),
                                    close_tag, strlen(close_tag)));
  grn_test_assert(grn_snip_add_cond(&context, snip, keyword2, strlen(keyword2),
                                    NULL, 0, NULL, 0));

  grn_test_assert(grn_snip_exec(&context, snip, text, strlen(text),
                                &n_results, &max_tagged_len));
  cut_assert_equal_uint(2, n_results);
  cut_assert_equal_uint(107, max_tagged_len);
  result = g_new(gchar, max_tagged_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len));
  cut_assert_equal_string("Groonga is an embeddable ((*fulltext*)) search "
                          "engine, which you can use in\n"
                          "conjunction with various scrip",
                          result);
  cut_assert_equal_uint(106, result_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len));
  cut_assert_equal_string("exing to achieve fast, precise searches. While\n"
                          "[[groonga]] codebase is rather compact it is "
                          "scalable eno",
                          result);
  cut_assert_equal_uint(104, result_len);
}
static grn_obj *
func_snippet_tritonn(grn_ctx *ctx, int nargs, grn_obj **args,
                  grn_user_data *user_data)
{
  grn_obj *snippets = NULL;

  if (nargs > 10) {
    grn_obj *text = args[0];
    grn_obj *snip = NULL;
    unsigned int width = GRN_UINT64_VALUE(args[1]);
    unsigned int max_n_results = GRN_UINT64_VALUE(args[2]);
    grn_snip_mapping *mapping = NULL;

    int flags = GRN_SNIP_COPY_TAG;

    if(GRN_UINT64_VALUE(args[4])){
      flags |= GRN_SNIP_SKIP_LEADING_SPACES;
    }
    if(GRN_UINT64_VALUE(args[5])){
      mapping = GRN_SNIP_MAPPING_HTML_ESCAPE;
    }
    snip = grn_snip_open(ctx, flags, width, max_n_results, "", 0, "", 0, mapping);

    if (snip) {
      grn_rc rc;
      unsigned int i;

      if(GRN_TEXT_LEN(args[3])){
        grn_obj * normalizer;
        normalizer = grn_ctx_get(ctx, GRN_TEXT_VALUE(args[3]), GRN_TEXT_LEN(args[3]));
        grn_snip_set_normalizer(ctx, snip, normalizer); 
      }

      for(i = 8; i < (unsigned int)nargs; i += 3){
        rc = grn_snip_add_cond(ctx, snip,
                               GRN_TEXT_VALUE(args[i]), GRN_TEXT_LEN(args[i]),
                               GRN_TEXT_VALUE(args[i + 1]), GRN_TEXT_LEN(args[i + 1]),
                               GRN_TEXT_VALUE(args[i + 2]), GRN_TEXT_LEN(args[i + 2]));
      }

      snippets = snippet_exec(ctx, snip, text, user_data, args);
      grn_obj_close(ctx, snip);
    }
  }
  if(!snippets){
    snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
  }

  return snippets;
}
Beispiel #12
0
void
test_add_cond_with_too_large_keyword(void)
{
  const gchar *sub_text;

  cut_assert_open_snip();

  cut_assert_operator_int(strlen(text), >, default_width);
  grn_test_assert_equal_rc(GRN_INVALID_ARGUMENT,
                           grn_snip_add_cond(&context, snip,
                                             text, strlen(text),
                                             NULL, 0, NULL, 0));

  sub_text = text + strlen(text) - default_width;
  grn_test_assert(grn_snip_add_cond(&context, snip,
                                    sub_text, strlen(sub_text),
                                    NULL, 0, NULL, 0));

  sub_text--;
  grn_test_assert_equal_rc(GRN_INVALID_ARGUMENT,
                           grn_snip_add_cond(&context, snip,
                                             sub_text, strlen(sub_text),
                                             NULL, 0, NULL, 0));
}
Beispiel #13
0
void
test_add_cond_with_invalid_argument(void)
{
  unsigned int n_conds = 0, max_n_conds = 32U;
  const gchar keyword[] = "Groonga";
  unsigned int keyword_len;
  const gchar open_tag[] = "<<";
  const gchar close_tag[] = ">>";
  unsigned int open_tag_len, close_tag_len;

  keyword_len = strlen(keyword);
  open_tag_len = strlen(open_tag);
  close_tag_len = strlen(close_tag);

  cut_assert_open_snip();

  grn_test_assert(grn_snip_add_cond(&context, snip,
                                    keyword, keyword_len,
                                    open_tag, open_tag_len,
                                    close_tag, close_tag_len));
  n_conds++;

  grn_test_assert_equal_rc(GRN_INVALID_ARGUMENT,
                           grn_snip_add_cond(&context, NULL,
                                             keyword, keyword_len,
                                             open_tag, open_tag_len,
                                             close_tag, close_tag_len));
  grn_test_assert_equal_rc(GRN_INVALID_ARGUMENT,
                           grn_snip_add_cond(&context, snip,
                                             NULL, keyword_len,
                                             open_tag, open_tag_len,
                                             close_tag, close_tag_len));
  grn_test_assert_equal_rc(GRN_INVALID_ARGUMENT,
                           grn_snip_add_cond(&context, snip,
                                             keyword, 0,
                                             open_tag, open_tag_len,
                                             close_tag, close_tag_len));

  while (n_conds < max_n_conds) {
    grn_test_assert(grn_snip_add_cond(&context, snip,
                                      keyword, keyword_len,
                                      open_tag, open_tag_len,
                                      close_tag, close_tag_len),
                    cut_message("cond #%d", n_conds));
    n_conds++;
  }

  grn_test_assert_equal_rc(GRN_INVALID_ARGUMENT,
                           grn_snip_add_cond(&context, snip,
                                             keyword, keyword_len,
                                             open_tag, open_tag_len,
                                             close_tag, close_tag_len),
                           cut_message("cond #%d", n_conds));
}
Beispiel #14
0
/* TODO: delete overlapping logic with exec_query */
static grn_rc
snip_query(grn_ctx *ctx, grn_query *q, grn_snip *snip, grn_cell *c, grn_operator op,
           unsigned int n_tags, int c_but,
           const char **opentags, unsigned int *opentag_lens,
           const char **closetags, unsigned int *closetag_lens)
{
  grn_cell *e, *ope = NIL;
  grn_operator op0 = GRN_OP_OR, *opp = &op0, op1 = q->default_op;
  while (c != NIL) {
    POP(e, c);
    switch (e->header.type) {
    case GRN_CELL_OP :
      ope = e;
      op1 = ope->u.op.op;
      continue;
    case GRN_CELL_STR :
      if (ope != NIL) {
        q->opt.mode = ope->u.op.mode == -1 ? q->default_mode : ope->u.op.mode;
      } else {
        q->opt.mode = q->default_mode;
      }
      if (!(c_but ^ (*opp == GRN_OP_BUT))) {
        grn_rc rc;
        unsigned int i = snip->cond_len % n_tags;
        if ((rc = grn_snip_add_cond(ctx, snip, e->u.b.value, e->u.b.size,
                                    opentags[i], opentag_lens[i],
                                    closetags[i], closetag_lens[i]))) {
          return rc;
        }
      }
      break;
    case GRN_CELL_LIST :
      snip_query(ctx, q, snip, e, *opp, n_tags, (*opp == GRN_OP_BUT) ? c_but ^ 1 : c_but,
                 opentags, opentag_lens, closetags, closetag_lens);
      break;
    default :
      GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid object assigned in query!! (%d)", e->header.type);
      break;
    }
    opp = &op1;
    ope = NIL;
    op1 = q->default_op;
  }
  return GRN_SUCCESS;
}
Beispiel #15
0
/*
 * call-seq:
 *   snippet.add_keyword(keyword, options={})
 *
 * _keyword_を追加する。_options_に指定可能な値は以下の通
 * り。
 *
 * [+:open_tag+]
 *   開始タグ。省略した場合はGroonga::Snippet.newで指定し
 *   た+:default_open_tag+。
 *
 * [+:close_tag+]
 *   終了タグ。省略した場合はGroonga::Snippet.newで指定し
 *   た+:default_close_tag+。
 */
static VALUE
rb_grn_snippet_add_keyword (int argc, VALUE *argv, VALUE self)
{
    RbGrnSnippet *rb_grn_snippet;
    grn_rc rc;
    VALUE rb_keyword, options;
    VALUE rb_open_tag, rb_close_tag;
    char *keyword, *open_tag = NULL, *close_tag = NULL;
    unsigned int keyword_length, open_tag_length = 0, close_tag_length = 0;

    rb_scan_args(argc, argv, "11", &rb_keyword, &options);

    rb_grn_snippet = SELF(self);

    keyword = StringValuePtr(rb_keyword);
    keyword_length = RSTRING_LEN(rb_keyword);

    rb_grn_scan_options(options,
                        "open_tag", &rb_open_tag,
                        "close_tag", &rb_close_tag,
                        NULL);

    if (!NIL_P(rb_open_tag)) {
        open_tag = StringValuePtr(rb_open_tag);
        open_tag_length = RSTRING_LEN(rb_open_tag);
    }

    if (!NIL_P(rb_close_tag)) {
        close_tag = StringValuePtr(rb_close_tag);
        close_tag_length = RSTRING_LEN(rb_close_tag);
    }

    rc = grn_snip_add_cond(rb_grn_snippet->context,
                           rb_grn_snippet->snippet,
                           keyword, keyword_length,
                           open_tag, open_tag_length,
                           close_tag, close_tag_length);
    rb_grn_rc_check(rc, self);

    return Qnil;
}
Beispiel #16
0
void
test_simple_exec_euc_jp(void)
{
  GError *error = NULL;
  unsigned int n_results;
  unsigned int max_tagged_len;
  unsigned int result_len;

  keyword = utf8_to_euc_jp("検索", &error);
  cut_assert_g_error(error);

  default_encoding = GRN_ENC_EUC_JP;

  cut_assert_open_snip();
  grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword),
                                    NULL, 0, NULL, 0));

  grn_test_assert(grn_snip_exec(&context, snip,
                                text_ja_euc, strlen(text_ja_euc),
                                &n_results, &max_tagged_len));
  cut_assert_equal_uint(2, n_results);
  cut_assert_equal_uint(108, max_tagged_len);
  result = g_new(gchar, max_tagged_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len));
  cut_assert_equal_string("型の全文[[検索]]エンジンです。"
                          "DBMSやスクリプト言語処理系等に\n"
                          "組み込むことによって、その全文[[検索]]機能を強",
                          take_euc_jp_to_utf8(result));
  cut_assert_equal_uint(107, result_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len));
  cut_assert_equal_string("です。コンパクトな実装ですが、大規模な文書\n"
                          "量と[[検索]]要求を処理できるように設計されて"
                          "います。また、純",
                          take_euc_jp_to_utf8(result));
  cut_assert_equal_uint(103, result_len);
}
Beispiel #17
0
void
test_simple_exec_sjis(void)
{
  GError *error = NULL;
  unsigned int n_results;
  unsigned int max_tagged_len;
  unsigned int result_len;

  keyword = utf8_to_sjis("処理", &error);
  cut_assert_g_error(error);

  default_encoding = GRN_ENC_SJIS;

  cut_assert_open_snip();
  grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword),
                                    NULL, 0, NULL, 0));

  grn_test_assert(grn_snip_exec(&context, snip,
                                text_ja_sjis, strlen(text_ja_sjis),
                                &n_results, &max_tagged_len));
  cut_assert_equal_uint(2, n_results);
  cut_assert_equal_uint(104, max_tagged_len);
  result = g_new(gchar, max_tagged_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len));
  cut_assert_equal_string("み型の全文検索エンジンです。"
                          "DBMSやスクリプト言語[[処理]]系等に\n"
                          "組み込むことによって、その全文検索機能を",
                          take_sjis_to_utf8(result));
  cut_assert_equal_uint(103, result_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len));
  cut_assert_equal_string("パクトな実装ですが、大規模な文書\n"
                          "量と検索要求を[[処理]]できるように設計"
                          "されています。また、純粋なn-gram",
                          take_sjis_to_utf8(result));
  cut_assert_equal_uint(103, result_len);
}
Beispiel #18
0
void
test_invalid_result_index(void)
{
  unsigned int n_results;
  unsigned int max_tagged_len;
  unsigned int result_len;
  const gchar keyword[] = "index";

  cut_assert_open_snip();
  grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword),
                                    NULL, 0, NULL, 0));

  grn_test_assert(grn_snip_exec(&context, snip, text, strlen(text),
                                &n_results, &max_tagged_len));
  cut_assert_equal_uint(1, n_results);
  cut_assert_equal_uint(113, max_tagged_len);
  result = g_new(gchar, max_tagged_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len));
  grn_test_assert_equal_rc(GRN_INVALID_ARGUMENT,
                           grn_snip_get_result(&context, snip, 1,
                                               result, &result_len));
}
Beispiel #19
0
void
test_customized_tag(void)
{
  const gchar open_tag[] = "((*";
  const gchar close_tag[] = "*))";
  unsigned int n_results;
  unsigned int max_tagged_len;
  unsigned int result_len;
  const gchar keyword[] = "engine";

  cut_assert_open_snip();
  grn_test_assert(grn_snip_add_cond(&context, snip, keyword, strlen(keyword),
                                    open_tag, strlen(open_tag),
                                    close_tag, strlen(close_tag)));

  grn_test_assert(grn_snip_exec(&context, snip, text, strlen(text),
                                &n_results, &max_tagged_len));
  cut_assert_equal_uint(2, n_results);
  cut_assert_equal_uint(107, max_tagged_len);
  result = g_new(gchar, max_tagged_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 0, result, &result_len));
  cut_assert_equal_string("Groonga is an embeddable fulltext search "
                          "((*engine*)), which you can use in\n"
                          "conjunction with various scrip",
                          result);
  cut_assert_equal_uint(106, result_len);

  grn_test_assert(grn_snip_get_result(&context, snip, 1, result, &result_len));
  cut_assert_equal_string(" databases. Groonga is\n"
                          "an inverted index based ((*engine*)), "
                          "& combines the best of n-gram\n"
                          "indexing and wo",
                          result);
  cut_assert_equal_uint(106, result_len);
}
Beispiel #20
0
static my_bool mrn_snippet_prepare(st_mrn_snip_info *snip_info, UDF_ARGS *args,
                                   char *message, grn_snip **snippet)
{
  unsigned int i;
  CHARSET_INFO *cs;
  grn_ctx *ctx = &snip_info->ctx;
  long long snip_max_len;
  long long snip_max_num;
  long long skip_leading_spaces;
  long long html_escape;
  int flags = GRN_SNIP_COPY_TAG;
  grn_snip_mapping *mapping = NULL;
  grn_rc rc;
  String *result_str = &snip_info->result_str;

  *snippet = NULL;
  snip_max_len = *((long long *) args->args[1]);
  snip_max_num = *((long long *) args->args[2]);

  if (args->arg_type[3] == STRING_RESULT) {
    if (!(cs = get_charset_by_name(args->args[3], MYF(0)))) {
      snprintf(message, MYSQL_ERRMSG_SIZE,
               "Unknown charset: <%s>", args->args[3]);
      goto error;
    }
  } else {
    uint charset_id = static_cast<uint>(*((long long *) args->args[3]));
    if (!(cs = get_charset(charset_id, MYF(0)))) {
      snprintf(message, MYSQL_ERRMSG_SIZE,
               "Unknown charset ID: <%u>", charset_id);
      goto error;
    }
  }
  if (!mrn::encoding::set(ctx, cs)) {
    snprintf(message, MYSQL_ERRMSG_SIZE,
             "Unsupported charset: <%s>", cs->name);
    goto error;
  }

  if (!(cs->state & (MY_CS_BINSORT | MY_CS_CSSORT))) {
    flags |= GRN_SNIP_NORMALIZE;
  }

  skip_leading_spaces = *((long long *) args->args[4]);
  if (skip_leading_spaces) {
    flags |= GRN_SNIP_SKIP_LEADING_SPACES;
  }

  html_escape = *((long long *) args->args[5]);
  if (html_escape) {
    mapping = (grn_snip_mapping *) -1;
  }

  *snippet = grn_snip_open(ctx, flags, static_cast<unsigned int>(snip_max_len),
                           static_cast<unsigned int>(snip_max_num),
                           "", 0, "", 0, mapping);
  if (ctx->rc) {
    snprintf(message, MYSQL_ERRMSG_SIZE,
             "Failed to open grn_snip: <%s>", ctx->errbuf);
    goto error;
  }

  for (i = 8; i < args->arg_count; i += 3) {
    rc = grn_snip_add_cond(ctx, *snippet,
                           args->args[i], args->lengths[i],
                           args->args[i + 1], args->lengths[i + 1],
                           args->args[i + 2], args->lengths[i + 2]);
    if (rc) {
      snprintf(message, MYSQL_ERRMSG_SIZE,
               "Failed to add a condition to grn_snip: <%s>", ctx->errbuf);
      goto error;
    }
  }

  result_str->set_charset(cs);
  return FALSE;

error:
  if (*snippet) {
    grn_snip_close(ctx, *snippet);
  }
  return TRUE;
}
Beispiel #21
0
/* TODO: support caching for the same parameter. */
static grn_obj *
func_snippet(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  grn_obj *snippets = NULL;

#define N_REQUIRED_ARGS 1
#define KEYWORD_SET_SIZE 3
  if (nargs > N_REQUIRED_ARGS) {
    grn_obj *text = args[0];
    grn_obj *end_arg = args[nargs - 1];
    grn_obj *snip = NULL;
    unsigned int width = 200;
    unsigned int max_n_results = 3;
    grn_snip_mapping *mapping = NULL;
    int flags = GRN_SNIP_SKIP_LEADING_SPACES;
    const char *prefix = NULL;
    int prefix_length = 0;
    const char *suffix = NULL;
    int suffix_length = 0;
    const char *normalizer_name = NULL;
    int normalizer_name_length = 0;
    const char *default_open_tag = NULL;
    int default_open_tag_length = 0;
    const char *default_close_tag = NULL;
    int default_close_tag_length = 0;
    int n_args_without_option = nargs;

    if (end_arg->header.type == GRN_TABLE_HASH_KEY) {
      grn_obj *options = end_arg;
      grn_hash_cursor *cursor;
      void *key;
      int key_size;
      grn_obj *value;

      n_args_without_option--;
      cursor = grn_hash_cursor_open(ctx, (grn_hash *)options,
                                    NULL, 0, NULL, 0,
                                    0, -1, 0);
      if (!cursor) {
        GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
                         "snippet(): couldn't open cursor");
        goto exit;
      }
      while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) {
        grn_hash_cursor_get_key_value(ctx, cursor,
                                      &key, &key_size,
                                      (void **)&value);
        if (key_size == 5 && !memcmp(key, "width", 5)) {
          width = GRN_UINT32_VALUE(value);
        } else if (key_size == 13 && !memcmp(key, "max_n_results", 13)) {
          max_n_results = GRN_UINT32_VALUE(value);
        } else if (key_size == 19 && !memcmp(key, "skip_leading_spaces", 19)) {
          if (GRN_BOOL_VALUE(value) == GRN_FALSE) {
            flags &= ~GRN_SNIP_SKIP_LEADING_SPACES;
          }
        } else if (key_size == 11 && !memcmp(key, "html_escape", 11)) {
          if (GRN_BOOL_VALUE(value)) {
            mapping = GRN_SNIP_MAPPING_HTML_ESCAPE;
          }
        } else if (key_size == 6 && !memcmp(key, "prefix", 6)) {
          prefix = GRN_TEXT_VALUE(value);
          prefix_length = GRN_TEXT_LEN(value);
        } else if (key_size == 6 && !memcmp(key, "suffix", 6)) {
          suffix = GRN_TEXT_VALUE(value);
          suffix_length = GRN_TEXT_LEN(value);
        } else if (key_size == 10 && !memcmp(key, "normalizer", 10)) {
          normalizer_name = GRN_TEXT_VALUE(value);
          normalizer_name_length = GRN_TEXT_LEN(value);
        } else if (key_size == 16 && !memcmp(key, "default_open_tag", 16)) {
          default_open_tag = GRN_TEXT_VALUE(value);
          default_open_tag_length = GRN_TEXT_LEN(value);
        } else if (key_size == 17 && !memcmp(key, "default_close_tag", 17)) {
          default_close_tag = GRN_TEXT_VALUE(value);
          default_close_tag_length = GRN_TEXT_LEN(value);
        } else {
          GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                           "invalid option name: <%.*s>",
                           key_size, (char *)key);
          grn_hash_cursor_close(ctx, cursor);
          goto exit;
        }
      }
      grn_hash_cursor_close(ctx, cursor);
    }

    snip = grn_snip_open(ctx, flags, width, max_n_results,
                         default_open_tag, default_open_tag_length,
                         default_close_tag, default_close_tag_length, mapping);
    if (snip) {
      grn_rc rc;
      unsigned int i;
      if (!normalizer_name) {
        grn_snip_set_normalizer(ctx, snip, GRN_NORMALIZER_AUTO);
      } else if (normalizer_name_length > 0) {
        grn_obj *normalizer;
        normalizer = grn_ctx_get(ctx, normalizer_name, normalizer_name_length);
        if (!grn_obj_is_normalizer_proc(ctx, normalizer)) {
          grn_obj inspected;
          GRN_TEXT_INIT(&inspected, 0);
          grn_inspect(ctx, &inspected, normalizer);
          GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                           "snippet(): not normalizer: <%.*s>",
                           (int)GRN_TEXT_LEN(&inspected),
                           GRN_TEXT_VALUE(&inspected));
          GRN_OBJ_FIN(ctx, &inspected);
          grn_obj_unlink(ctx, normalizer);
          goto exit;
        }
        grn_snip_set_normalizer(ctx, snip, normalizer);
        grn_obj_unlink(ctx, normalizer);
      }
      if (default_open_tag_length == 0 && default_close_tag_length == 0) {
        unsigned int n_keyword_sets =
          (n_args_without_option - N_REQUIRED_ARGS) / KEYWORD_SET_SIZE;
        grn_obj **keyword_set_args = args + N_REQUIRED_ARGS;
        for (i = 0; i < n_keyword_sets; i++) {
          rc = grn_snip_add_cond(ctx, snip,
                                 GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE]),
                                 GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE]),
                                 GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE + 1]),
                                 GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE + 1]),
                                 GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE + 2]),
                                 GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE + 2]));
        }
      } else {
        unsigned int n_keywords = n_args_without_option - N_REQUIRED_ARGS;
        grn_obj **keyword_args = args + N_REQUIRED_ARGS;
        for (i = 0; i < n_keywords; i++) {
          rc = grn_snip_add_cond(ctx, snip,
                                 GRN_TEXT_VALUE(keyword_args[i]),
                                 GRN_TEXT_LEN(keyword_args[i]),
                                 NULL, 0,
                                 NULL, 0);
        }
      }
      snippets = snippet_exec(ctx, snip, text, user_data,
                              prefix, prefix_length,
                              suffix, suffix_length);
    }
  }
#undef KEYWORD_SET_SIZE
#undef N_REQUIRED_ARGS

exit :
  if (!snippets) {
    snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
  }

  return snippets;
}
Beispiel #22
0
static mrn_bool mrn_snippet_html_prepare(mrn_snippet_html_info *info,
                                        UDF_ARGS *args,
                                        char *message,
                                        grn_obj **snippet)
{
  MRN_DBUG_ENTER_FUNCTION();

  grn_ctx *ctx = info->ctx;
  int flags = GRN_SNIP_SKIP_LEADING_SPACES;
  unsigned int width = 200;
  unsigned int max_n_results = 3;
  const char *open_tag = "<span class=\"keyword\">";
  const char *close_tag = "</span>";
  grn_snip_mapping *mapping = GRN_SNIP_MAPPING_HTML_ESCAPE;
  grn_obj *expr = NULL;

  *snippet = NULL;

  mrn::encoding::set_raw(ctx, system_charset_info);
  if (!(system_charset_info->state & (MY_CS_BINSORT | MY_CS_CSSORT))) {
    flags |= GRN_SNIP_NORMALIZE;
  }

  *snippet = grn_snip_open(ctx, flags,
                           width, max_n_results,
                           open_tag, strlen(open_tag),
                           close_tag, strlen(close_tag),
                           mapping);
  if (ctx->rc != GRN_SUCCESS) {
    if (message) {
      snprintf(message, MYSQL_ERRMSG_SIZE,
               "mroonga_snippet_html(): failed to open grn_snip: <%s>",
               ctx->errbuf);
    }
    goto error;
  }

  if (info->query_mode.used) {
    if (!info->query_mode.table) {
      grn_obj *short_text;
      short_text = grn_ctx_at(info->ctx, GRN_DB_SHORT_TEXT);
      info->query_mode.table = grn_table_create(info->ctx,
                                                NULL, 0, NULL,
                                                GRN_TABLE_HASH_KEY,
                                                short_text,
                                                NULL);
    }
    if (!info->query_mode.default_column) {
      info->query_mode.default_column =
        grn_obj_column(info->ctx,
                       info->query_mode.table,
                       GRN_COLUMN_NAME_KEY,
                       GRN_COLUMN_NAME_KEY_LEN);
    }

    grn_obj *record = NULL;
    GRN_EXPR_CREATE_FOR_QUERY(info->ctx, info->query_mode.table, expr, record);
    if (!expr) {
      if (message) {
        snprintf(message, MYSQL_ERRMSG_SIZE,
                 "mroonga_snippet_html(): "
                 "failed to create expression: <%s>",
                 ctx->errbuf);
      }
      goto error;
    }

    mrn::QueryParser query_parser(info->ctx,
                                  current_thd,
                                  expr,
                                  info->query_mode.default_column,
                                  0,
                                  NULL);
    grn_rc rc = query_parser.parse(args->args[1], args->lengths[1]);
    if (rc != GRN_SUCCESS) {
      if (message) {
        snprintf(message, MYSQL_ERRMSG_SIZE,
                 "mroonga_snippet_html(): "
                 "failed to parse query: <%s>",
                 ctx->errbuf);
      }
      goto error;
    }

    rc = grn_expr_snip_add_conditions(info->ctx,
                                      expr,
                                      *snippet,
                                      0,
                                      NULL, NULL,
                                      NULL, NULL);
    if (rc != GRN_SUCCESS) {
      if (message) {
        snprintf(message, MYSQL_ERRMSG_SIZE,
                 "mroonga_snippet_html(): "
                 "failed to add conditions: <%s>",
                 ctx->errbuf);
      }
      goto error;
    }
  } else {
    unsigned int i;
    for (i = 1; i < args->arg_count; ++i) {
      if (!args->args[i]) {
        continue;
      }
      grn_rc rc = grn_snip_add_cond(ctx, *snippet,
                                    args->args[i], args->lengths[i],
                                    NULL, 0,
                                    NULL, 0);
      if (rc != GRN_SUCCESS) {
        if (message) {
          snprintf(message, MYSQL_ERRMSG_SIZE,
                   "mroonga_snippet_html(): "
                   "failed to add a condition to grn_snip: <%s>",
                   ctx->errbuf);
        }
        goto error;
      }
    }
  }

  DBUG_RETURN(false);

error:
  if (expr) {
    grn_obj_close(ctx, expr);
  }
  if (*snippet) {
    grn_obj_close(ctx, *snippet);
  }
  DBUG_RETURN(true);
}