Beispiel #1
0
static grn_obj *
proc_table_create(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
    uint32_t nvars;
    grn_obj *buf = args[0];
    grn_expr_var *vars;
    grn_proc_get_info(ctx, user_data, &vars, &nvars, NULL);
    if (nvars == 6) {
        grn_obj *table;
        grn_obj_flags flags = grn_atoi(GRN_TEXT_VALUE(&vars[1].value),
                                       GRN_BULK_CURR(&vars[1].value), NULL);
        if (GRN_TEXT_LEN(&vars[0].value)) {
            flags |= GRN_OBJ_PERSISTENT;
        }
        table = grn_table_create(ctx,
                                 GRN_TEXT_VALUE(&vars[0].value),
                                 GRN_TEXT_LEN(&vars[0].value),
                                 NULL, flags,
                                 grn_ctx_get(ctx, GRN_TEXT_VALUE(&vars[2].value),
                                             GRN_TEXT_LEN(&vars[2].value)),
                                 grn_ctx_get(ctx, GRN_TEXT_VALUE(&vars[3].value),
                                             GRN_TEXT_LEN(&vars[3].value)));
        if (table) {
            grn_obj_set_info(ctx, table,
                             GRN_INFO_DEFAULT_TOKENIZER,
                             grn_ctx_get(ctx, GRN_TEXT_VALUE(&vars[4].value),
                                         GRN_TEXT_LEN(&vars[4].value)));
            grn_obj_unlink(ctx, table);
        }
        GRN_TEXT_PUTS(ctx, buf, ctx->rc ? "false" : "true");
    }
    return buf;
}
Beispiel #2
0
grn_index *
grn_index_open(grn_ctx *ctx, const char *path)
{
  grn_obj *db, *keys, *key_type, *lexicon, *inv, *tokenizer;
  if ((db = grn_db_create(ctx, NULL, NULL))) {
    char buffer[PATH_MAX];
    strcpy(buffer, path);
    strcat(buffer, ".SEN");
    if ((key_type = grn_ctx_at(ctx, GRN_DB_SHORTTEXT))) {
      if ((keys = grn_table_open(ctx, "<keys>", 6, buffer))) {
        strcpy(buffer, path);
        strcat(buffer, ".SEN.l");
        if ((lexicon = grn_table_open(ctx, "<lexicon>", 9, buffer))) {
          if ((tokenizer = grn_ctx_at(ctx, GRN_DB_MECAB))) {
            grn_obj_set_info(ctx, lexicon, GRN_INFO_DEFAULT_TOKENIZER, tokenizer);
            strcpy(buffer, path);
            strcat(buffer, ".SEN.i");
            if ((inv = grn_column_open(ctx, lexicon, "inv", 3, buffer, keys))) {
              grn_index *index;
              if ((index = malloc(sizeof(grn_index)))) {
                index->db = db;
                index->keys = keys;
                index->lexicon = lexicon;
                index->inv = inv;
                return index;
              }
            }
          }
        }
      }
    }
  }
  return NULL;
}
Beispiel #3
0
static void
create_terms_table(void)
{
  terms = grn_table_create(&context, "terms", 5, NULL,
                           GRN_OBJ_TABLE_PAT_KEY|GRN_OBJ_PERSISTENT,
                           grn_ctx_at(&context, GRN_DB_SHORT_TEXT), NULL);
  cut_assert_not_null(terms);
  grn_test_assert(grn_obj_set_info(&context, terms, GRN_INFO_DEFAULT_TOKENIZER,
				   grn_ctx_at(&context, GRN_DB_BIGRAM)));

  index_body = grn_column_create(&context, terms, "docs_body", 4, NULL,
                                 GRN_OBJ_COLUMN_INDEX|GRN_OBJ_PERSISTENT|GRN_OBJ_WITH_POSITION,
                                 docs);
  cut_assert_not_null(index_body);

  GRN_UINT32_SET(&context, &int_buf, grn_obj_id(&context, body));
  grn_obj_set_info(&context, index_body, GRN_INFO_SOURCE, &int_buf);
}
Beispiel #4
0
static void
prepare_data(grn_obj *textbuf, grn_obj *intbuf)
{
  docs = grn_table_create(&context, "docs", 4, NULL,
                          GRN_OBJ_TABLE_NO_KEY|GRN_OBJ_PERSISTENT, NULL, 0);
  cut_assert_not_null(docs);

  terms = grn_table_create(&context, "terms", 5, NULL,
                           GRN_OBJ_TABLE_PAT_KEY|GRN_OBJ_PERSISTENT,
                           grn_ctx_at(&context, GRN_DB_SHORTTEXT), 0);
  cut_assert_not_null(terms);
  grn_test_assert(grn_obj_set_info(&context, terms, GRN_INFO_DEFAULT_TOKENIZER,
				   grn_ctx_at(&context, GRN_DB_BIGRAM)));

  size = grn_column_create(&context, docs, "size", 4, NULL,
                           GRN_OBJ_COLUMN_SCALAR|GRN_OBJ_PERSISTENT,
                           grn_ctx_at(&context, GRN_DB_UINT32));
  cut_assert_not_null(size);

  body = grn_column_create(&context, docs, "body", 4, NULL,
                           GRN_OBJ_COLUMN_SCALAR|GRN_OBJ_PERSISTENT,
                           grn_ctx_at(&context, GRN_DB_TEXT));
  cut_assert_not_null(body);

  index_body = grn_column_create(&context, terms, "docs_body", 4, NULL,
                                 GRN_OBJ_COLUMN_INDEX|GRN_OBJ_PERSISTENT|GRN_OBJ_WITH_POSITION,
                                 docs);
  cut_assert_not_null(index_body);

  GRN_UINT32_SET(&context, intbuf, grn_obj_id(&context, body));
  grn_obj_set_info(&context, index_body, GRN_INFO_SOURCE, intbuf);

  INSERT_DATA("hoge");
  INSERT_DATA("fuga fuga");
  INSERT_DATA("moge moge moge");
  INSERT_DATA("hoge hoge");
  INSERT_DATA("hoge fuga fuga");
  INSERT_DATA("hoge moge moge moge");
  INSERT_DATA("moge hoge hoge");
  INSERT_DATA("moge hoge fuga fuga");
  INSERT_DATA("moge hoge moge moge moge");
  INSERT_DATA("poyo moge hoge moge moge moge");
}
Beispiel #5
0
void
cut_setup(void)
{
    gchar *table_path, *vgram_path;
    const gchar *type_name, *table_name;

    cut_set_fixture_data_dir(grn_test_get_base_dir(),
                             "fixtures",
                             "inverted-index",
                             NULL);

    logger = setup_grn_logger();

    expected_messages = NULL;
    record_ids = NULL;

    remove_tmp_directory();
    g_mkdir_with_parents(tmp_directory, 0700);
    path = g_build_filename(tmp_directory, "inverted-index", NULL);

    context = g_new0(grn_ctx, 1);
    grn_test_assert(grn_ctx_init(context, GRN_CTX_USE_QL));
    GRN_CTX_SET_ENCODING(context, GRN_ENC_UTF8);

    db = grn_db_create(context, NULL, NULL);
    grn_ctx_use(context, db);

    type_name = "name";
    type = grn_type_create(context, type_name, strlen(type_name),
                           GRN_OBJ_KEY_VAR_SIZE, TYPE_SIZE);

    table_name = "lexicon";
    table_path = g_build_filename(tmp_directory, "lexicon-table", NULL);
    lexicon = grn_table_create(context,
                               table_name, strlen(table_name),
                               table_path,
                               GRN_OBJ_PERSISTENT | GRN_OBJ_TABLE_PAT_KEY,
                               type, NULL);

    grn_obj_set_info(context, lexicon, GRN_INFO_DEFAULT_TOKENIZER,
                     grn_ctx_at(context, GRN_DB_BIGRAM));

    g_free(table_path);

    vgram_path = g_build_filename(tmp_directory, "vgram", NULL);
    /*
      vgram = grn_vgram_create(vgram_path);
    */
    g_free(vgram_path);

    inverted_index = NULL;
}
Beispiel #6
0
static grn_obj *
proc_column_create(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
    uint32_t nvars;
    grn_obj *buf = args[0];
    grn_expr_var *vars;
    grn_proc_get_info(ctx, user_data, &vars, &nvars, NULL);
    if (nvars == 6) {
        grn_obj_flags flags = grn_atoi(GRN_TEXT_VALUE(&vars[2].value),
                                       GRN_BULK_CURR(&vars[2].value), NULL);
        grn_obj *column, *table = grn_ctx_get(ctx, GRN_TEXT_VALUE(&vars[0].value),
                                              GRN_TEXT_LEN(&vars[0].value));
        grn_obj *type = grn_ctx_get(ctx, GRN_TEXT_VALUE(&vars[3].value),
                                    GRN_TEXT_LEN(&vars[3].value));
        if (GRN_TEXT_LEN(&vars[1].value)) {
            flags |= GRN_OBJ_PERSISTENT;
        }
        column = grn_column_create(ctx, table,
                                   GRN_TEXT_VALUE(&vars[1].value),
                                   GRN_TEXT_LEN(&vars[1].value),
                                   NULL, flags, type);
        if (column) {
            if (GRN_TEXT_LEN(&vars[4].value)) {
                grn_obj sources, source_ids, **p, **pe;
                GRN_PTR_INIT(&sources, GRN_OBJ_VECTOR, GRN_ID_NIL);
                GRN_UINT32_INIT(&source_ids, GRN_OBJ_VECTOR);
                grn_obj_columns(ctx, type,
                                GRN_TEXT_VALUE(&vars[4].value),
                                GRN_TEXT_LEN(&vars[4].value),
                                &sources);
                p = (grn_obj **)GRN_BULK_HEAD(&sources);
                pe = (grn_obj **)GRN_BULK_CURR(&sources);
                while (p < pe) {
                    grn_id source_id = grn_obj_id(ctx, *p++);
                    if (source_id) {
                        GRN_UINT32_PUT(ctx, &source_ids, source_id);
                    }
                }
                if (GRN_BULK_VSIZE(&source_ids)) {
                    grn_obj_set_info(ctx, column, GRN_INFO_SOURCE, &source_ids);
                }
                GRN_OBJ_FIN(ctx, &source_ids);
                GRN_OBJ_FIN(ctx, &sources);
            }
            grn_obj_unlink(ctx, column);
        }
        GRN_TEXT_PUTS(ctx, buf, ctx->rc ? "false" : "true");
    }
    return buf;
}
Beispiel #7
0
static grn_obj *
func_highlight_html_create_keywords_table(grn_ctx *ctx, grn_obj *expression)
{
  grn_obj *keywords;
  grn_obj *condition_ptr = NULL;
  grn_obj *condition = NULL;

  keywords = grn_table_create(ctx, NULL, 0, NULL,
                              GRN_OBJ_TABLE_PAT_KEY,
                              grn_ctx_at(ctx, GRN_DB_SHORT_TEXT),
                              NULL);

  {
    grn_obj *normalizer;
    normalizer = grn_ctx_get(ctx, "NormalizerAuto", -1);
    grn_obj_set_info(ctx, keywords, GRN_INFO_NORMALIZER, normalizer);
    grn_obj_unlink(ctx, normalizer);
  }

  condition_ptr = grn_expr_get_var(ctx, expression,
                                   GRN_SELECT_INTERNAL_VAR_CONDITION,
                                   strlen(GRN_SELECT_INTERNAL_VAR_CONDITION));
  if (condition_ptr) {
    condition = GRN_PTR_VALUE(condition_ptr);
  }

  if (condition) {
    size_t i, n_keywords;
    grn_obj current_keywords;
    GRN_PTR_INIT(&current_keywords, GRN_OBJ_VECTOR, GRN_ID_NIL);
    grn_expr_get_keywords(ctx, condition, &current_keywords);

    n_keywords = GRN_BULK_VSIZE(&current_keywords) / sizeof(grn_obj *);
    for (i = 0; i < n_keywords; i++) {
      grn_obj *keyword;
      keyword = GRN_PTR_VALUE_AT(&current_keywords, i);
      grn_table_add(ctx, keywords,
                    GRN_TEXT_VALUE(keyword),
                    GRN_TEXT_LEN(keyword),
                    NULL);
    }
    grn_obj_unlink(ctx, &current_keywords);
  }

  return keywords;
}
Beispiel #8
0
static grn_rc
set_index_source(grn_obj *index, grn_obj *source)
{
    grn_rc rc;
    grn_obj buf;
    grn_id id = grn_obj_id(context, source);
    GRN_TEXT_INIT(&buf, 0);
    grn_bulk_write(context, &buf, (void *)&id, sizeof(grn_id));
    cut_assert_equal_int(grn_obj_get_nhooks(context, source, GRN_HOOK_SET), 0);
    rc = grn_obj_set_info(context, index, GRN_INFO_SOURCE, &buf);
    cut_assert_equal_int(grn_obj_get_nhooks(context, source, GRN_HOOK_SET), 1);
    GRN_BULK_REWIND(&buf);
    cut_assert_null(grn_obj_get_hook(context, source, GRN_HOOK_SET, 0, &buf));
    cut_assert_equal_int(*((grn_id *)GRN_BULK_HEAD(&buf)), grn_obj_id(context, index));
    grn_obj_close(context, &buf);
    return rc;
}
Beispiel #9
0
void
test_temporary_table_default_tokenizer(gpointer data)
{
  grn_obj *table;
  grn_obj_flags flags = GPOINTER_TO_INT(data);
  grn_obj *tokenizer = NULL;
  char name[1024];
  int name_size;

  table = grn_table_create(context, NULL, 0, NULL,
                           flags,
                           NULL, NULL);
  grn_obj_set_info(context, table, GRN_INFO_DEFAULT_TOKENIZER,
                   get_object("TokenTrigram"));
  tokenizer = grn_obj_get_info(context, table, GRN_INFO_DEFAULT_TOKENIZER, NULL);
  name_size = grn_obj_name(context, tokenizer, name, sizeof(name));
  name[name_size] = '\0';
  cut_assert_equal_string("TokenTrigram", name);
}
/*
 * Specifies the normalizer used by {Groonga::IndexColumn}.
 *
 * @example
 *   # Uses NFKC normalizer.
 *   table.normalizer = "NormalizerNFKC51"
 *   # Specifies normalizer object.
 *   table.normalizer = Groonga::Context["NormalizerNFKC51"]
 *   # Uses auto normalizer that is a normalizer for backward compatibility.
 *   table.normalizer = "TNormalizerAuto"
 *
 * @overload normalizer=(name)
 *    @param [String] name Set a nomalizer named @name@.
 *
 * @overload normalizer=(normalizer)
 *    @param [Groonga::Procedure] normalizer Set the normalizer object.
 *
 * @overload normalizer=(normalizer)
 *    @param [nil] normalizer Unset normalizer.
 */
static VALUE
rb_grn_table_key_support_set_normalizer (VALUE self, VALUE rb_normalizer)
{
    grn_ctx *context;
    grn_obj *table;
    grn_obj *normalizer;
    grn_rc rc;

    rb_grn_table_key_support_deconstruct(SELF(self), &table, &context,
                                         NULL, NULL, NULL,
                                         NULL, NULL, NULL,
                                         NULL);

    normalizer = RVAL2GRNOBJECT(rb_normalizer, &context);
    rc = grn_obj_set_info(context, table, GRN_INFO_NORMALIZER, normalizer);
    rb_grn_context_check(context, self);
    rb_grn_rc_check(rc, self);

    return Qnil;
}
Beispiel #11
0
/*
 * インデックス対象となる複数のカラムを配列で設定する。
 *
 * @overload sources=(columns)
 *   @param [::Array<Groonga::Column>] columns インデックス対象となるカラムの配列
 */
static VALUE
rb_grn_index_column_set_sources (VALUE self, VALUE rb_sources)
{
    VALUE exception;
    grn_ctx *context = NULL;
    grn_obj *column;
    int i, n;
    VALUE *rb_source_values;
    grn_id range_id;
    grn_id *sources;
    grn_rc rc;

    rb_grn_index_column_deconstruct(SELF(self), &column, &context,
                                    NULL, NULL,
                                    NULL, NULL,
                                    &range_id, NULL,
                                    NULL, NULL);

    n = RARRAY_LEN(rb_sources);
    rb_source_values = RARRAY_PTR(rb_sources);
    sources = ALLOCA_N(grn_id, n);
    for (i = 0; i < n; i++) {
        sources[i] = resolve_source_id(context, column, range_id,
                                       rb_source_values[i]);
    }

    {
        grn_obj bulk_sources;
        GRN_OBJ_INIT(&bulk_sources, GRN_BULK, 0, GRN_ID_NIL);
        GRN_TEXT_SET(context, &bulk_sources, sources, n * sizeof(grn_id));
        rc = grn_obj_set_info(context, column, GRN_INFO_SOURCE, &bulk_sources);
        exception = rb_grn_context_to_exception(context, self);
        grn_obj_unlink(context, &bulk_sources);
    }

    if (!NIL_P(exception))
        rb_exc_raise(exception);
    rb_grn_rc_check(rc, self);

    return Qnil;
}
/*
 * {Groonga::IndexColumn} で使用するトークナイザを設定する。
 *
 * @example
 *   # 2-gramを使用。
 *   table.default_tokenizer = "TokenBigram"
 *   # オブジェクトで指定
 *   table.default_tokenizer = Groonga::Context.default["TokenBigram"]
 *   # オブジェクトIDで指定
 *   table.default_tokenizer = Groonga::Type::BIGRAM
 *   # N-gram用のトークナイザを使うときはGroonga::IndexColumn
 *   # には自動的に:with_section => trueが指定される。
 *   index = table.define_index_column("blog_content", "Blogs",
 *                                     :source => "content")
 *   p index # -> #<Groonga::IndexColumn ... flags: <WITH_POSITION|...>>
 *
 *   # MeCabを使用
 *   table.default_tokenizer = "TokenMecab"
 *
 * @overload default_tokenizer=(tokenizer)
 */
static VALUE
rb_grn_table_key_support_set_default_tokenizer (VALUE self, VALUE rb_tokenizer)
{
    grn_ctx *context;
    grn_obj *table;
    grn_obj *tokenizer;
    grn_rc rc;

    rb_grn_table_key_support_deconstruct(SELF(self), &table, &context,
                                         NULL, NULL, NULL,
                                         NULL, NULL, NULL,
                                         NULL);

    tokenizer = RVAL2GRNOBJECT(rb_tokenizer, &context);
    rc = grn_obj_set_info(context, table,
                          GRN_INFO_DEFAULT_TOKENIZER, tokenizer);
    rb_grn_context_check(context, self);
    rb_grn_rc_check(rc, self);

    return Qnil;
}
Beispiel #13
0
 grn_obj *FieldNormalizer::normalize(const char *string,
                                     unsigned int string_length) {
   MRN_DBUG_ENTER_METHOD();
   grn_obj normalizer;
   GRN_TEXT_INIT(&normalizer, 0);
   find_grn_normalizer(&normalizer);
   int flags = 0;
   grn_encoding original_encoding = GRN_CTX_GET_ENCODING(ctx_);
   encoding::set_raw(ctx_, field_->charset());
   grn_obj *grn_string;
   if (GRN_TEXT_VALUE(&normalizer)[GRN_TEXT_LEN(&normalizer) - 1] == ')') {
     if (!lexicon_) {
       lexicon_ = grn_table_create(ctx_,
                                   NULL, 0,
                                   NULL,
                                   GRN_OBJ_TABLE_PAT_KEY,
                                   grn_ctx_at(ctx_, GRN_DB_SHORT_TEXT),
                                   NULL);
     }
     grn_obj_set_info(ctx_, lexicon_, GRN_INFO_NORMALIZER, &normalizer);
     grn_string = grn_string_open(ctx_,
                                  string,
                                  string_length,
                                  lexicon_,
                                  flags);
   } else {
     grn_string = grn_string_open(ctx_,
                                  string,
                                  string_length,
                                  grn_ctx_get(ctx_,
                                              GRN_TEXT_VALUE(&normalizer),
                                              GRN_TEXT_LEN(&normalizer)),
                                  flags);
   }
   GRN_OBJ_FIN(ctx_, &normalizer);
   GRN_CTX_SET_ENCODING(ctx_, original_encoding);
   DBUG_RETURN(grn_string);
 }
Beispiel #14
0
static grn_obj *
func_highlight_create_keywords_table(grn_ctx *ctx,
                                     grn_user_data *user_data,
                                     const char *normalizer_name,
                                     unsigned int normalizer_name_length)
{
  grn_obj *keywords;

  keywords = grn_table_create(ctx, NULL, 0, NULL,
                              GRN_OBJ_TABLE_PAT_KEY,
                              grn_ctx_at(ctx, GRN_DB_SHORT_TEXT),
                              NULL);

  if (normalizer_name_length > 0) {
    grn_obj *normalizer;
    normalizer = grn_ctx_get(ctx,
                             normalizer_name,
                             normalizer_name_length);
    if (!grn_obj_is_normalizer_proc(ctx, normalizer)) {
      grn_obj inspected;
      GRN_TEXT_INIT(&inspected, 0);
      grn_inspect(ctx, &inspected, normalizer);
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "highlight_full() not normalizer: <%.*s>",
                       (int)GRN_TEXT_LEN(&inspected),
                       GRN_TEXT_VALUE(&inspected));
      GRN_OBJ_FIN(ctx, &inspected);
      grn_obj_unlink(ctx, normalizer);
      grn_obj_unlink(ctx, keywords);
      return NULL;
    }
    grn_obj_set_info(ctx, keywords, GRN_INFO_NORMALIZER, normalizer);
    grn_obj_unlink(ctx, normalizer);
  }

  return keywords;
}
/*
 * Sets token filters that used in {Groonga::IndexColumn}.
 *
 * @example
 *   # Use "TokenFilterStem" and "TokenfilterStopWord"
 *   table.token_filters = ["TokenFilterStem", "TokenFilterStopWord"]
 *
 * @overload token_filters=(token_filters)
 *   @param token_filters [::Array<String>] Token filter names.
 */
static VALUE
rb_grn_table_key_support_set_token_filters (VALUE self,
                                            VALUE rb_token_filters)
{
    grn_ctx *context;
    grn_obj *table;
    grn_obj token_filters;
    grn_rc rc;

    rb_grn_table_key_support_deconstruct(SELF(self), &table, &context,
                                         NULL, NULL, NULL,
                                         NULL, NULL, NULL,
                                         NULL);

    GRN_PTR_INIT(&token_filters, GRN_OBJ_VECTOR, GRN_ID_NIL);
    RVAL2GRNPVECTOR(rb_token_filters, context, &token_filters);
    rc = grn_obj_set_info(context, table,
                          GRN_INFO_TOKEN_FILTERS, &token_filters);
    grn_obj_unlink(context, &token_filters);
    rb_grn_context_check(context, self);
    rb_grn_rc_check(rc, self);

    return Qnil;
}
Beispiel #16
0
void
test_expr_query(void)
{
  grn_obj *t1, *c1, *lc, *ft, *v, *expr;
  grn_obj textbuf, intbuf;
  grn_id r1, r2, r3, r4;

  /* actual table */
  t1 = grn_table_create(&context, "t1", 2, NULL,
			GRN_OBJ_TABLE_NO_KEY|GRN_OBJ_PERSISTENT, NULL, 0);
  cut_assert_not_null(t1);

  /* lexicon table */
  lc = grn_table_create(&context, "lc", 2, NULL,
			GRN_OBJ_TABLE_PAT_KEY|GRN_OBJ_PERSISTENT,
                        grn_ctx_at(&context, GRN_DB_SHORTTEXT), 0);
  cut_assert_not_null(lc);
  grn_test_assert(grn_obj_set_info(&context, lc, GRN_INFO_DEFAULT_TOKENIZER,
				   grn_ctx_at(&context, GRN_DB_BIGRAM)));

  /* actual column */
  c1 = grn_column_create(&context, t1, "c1", 2, NULL,
			 GRN_OBJ_COLUMN_SCALAR|GRN_OBJ_PERSISTENT,
			 grn_ctx_at(&context, GRN_DB_TEXT));
  cut_assert_not_null(c1);

  /* fulltext index */
  ft = grn_column_create(&context, lc, "ft", 2, NULL,
			 GRN_OBJ_COLUMN_INDEX|GRN_OBJ_PERSISTENT|GRN_OBJ_WITH_POSITION, t1);
  cut_assert_not_null(ft);

  GRN_TEXT_INIT(&textbuf, 0);
  GRN_UINT32_INIT(&intbuf, 0);

  /* link between actual column and fulltext index */
  GRN_UINT32_SET(&context, &intbuf, grn_obj_id(&context, c1));
  grn_obj_set_info(&context, ft, GRN_INFO_SOURCE, &intbuf); /* need to use grn_id */

  /* insert row */
  r1 = grn_table_add(&context, t1, NULL, 0, NULL);
  cut_assert_equal_int(1, r1);
  GRN_TEXT_SETS(&context, &textbuf, "abhij");
  grn_test_assert(grn_obj_set_value(&context, c1, r1, &textbuf, GRN_OBJ_SET));

  r2 = grn_table_add(&context, t1, NULL, 0, NULL);
  cut_assert_equal_int(2, r2);
  GRN_TEXT_SETS(&context, &textbuf, "fghij");
  grn_test_assert(grn_obj_set_value(&context, c1, r2, &textbuf, GRN_OBJ_SET));

  r3 = grn_table_add(&context, t1, NULL, 0, NULL);
  cut_assert_equal_int(3, r3);
  GRN_TEXT_SETS(&context, &textbuf, "11 22 33");
  grn_test_assert(grn_obj_set_value(&context, c1, r3, &textbuf, GRN_OBJ_SET));

  r4 = grn_table_add(&context, t1, NULL, 0, NULL);
  cut_assert_equal_int(4, r4);
  GRN_TEXT_SETS(&context, &textbuf, "44 22 55");
  grn_test_assert(grn_obj_set_value(&context, c1, r4, &textbuf, GRN_OBJ_SET));

  /* confirm record are inserted in both column and index */
  cut_assert_equal_int(4, grn_table_size(&context, t1));
  cut_assert_equal_int(17, grn_table_size(&context, lc));

  cut_assert_not_null((expr = grn_expr_create(&context, NULL, 0)));

  v = grn_expr_add_var(&context, expr, NULL, 0);

  GRN_BULK_REWIND(&textbuf);
  grn_expr_append_const(&context, expr, &textbuf);
  GRN_UINT32_SET(&context, &intbuf, GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC);
  grn_expr_append_const(&context, expr, &intbuf);
  grn_expr_append_obj(&context, expr, t1);
  GRN_UINT32_SET(&context, &intbuf, 0);
  grn_expr_append_const(&context, expr, &intbuf);
  grn_expr_append_op(&context, expr, GRN_OP_TABLE_CREATE, 4);

  grn_expr_append_obj(&context, expr, v);
  grn_expr_append_op(&context, expr, GRN_OP_VAR_SET_VALUE, 2);

  grn_expr_append_obj(&context, expr, ft);
  GRN_TEXT_SETS(&context, &textbuf, "hij");
  grn_expr_append_const(&context, expr, &textbuf);
  grn_expr_append_obj(&context, expr, v);
  GRN_UINT32_SET(&context, &intbuf, GRN_SEL_OR);
  grn_expr_append_const(&context, expr, &intbuf);
  grn_expr_append_op(&context, expr, GRN_OP_OBJ_SEARCH, 4);

  grn_expr_append_obj(&context, expr, v);
  GRN_TEXT_SETS(&context, &textbuf, ".c1 .:score");
  grn_expr_append_const(&context, expr, &textbuf);
  GRN_BULK_REWIND(&textbuf);
  grn_expr_append_obj(&context, expr, &textbuf);
  grn_expr_append_op(&context, expr, GRN_OP_JSON_PUT, 3);

  grn_expr_compile(&context, expr);

  grn_expr_exec(&context, expr);

  cut_assert_equal_uint(0, grn_obj_close(&context, expr));

  cut_assert_equal_substring("[[\"abhij\", 1], [\"fghij\", 1]]",
                             GRN_TEXT_VALUE(&textbuf), GRN_TEXT_LEN(&textbuf));

  grn_obj_close(&context, &textbuf);
  grn_obj_close(&context, ft);
  grn_obj_close(&context, c1);
  grn_obj_close(&context, lc);
  grn_obj_close(&context, t1);
}
Beispiel #17
0
void
test_mroonga_index_score(void)
{
    grn_obj *t1,*c1,*lc,*ft;
    grn_obj buff;
    grn_id r1,r2,r3,r4;

    remove_tmp_directory();
    g_mkdir_with_parents(tmp_directory,0700);
    g_chdir(tmp_directory);
    g_mkdir_with_parents("mrn",0700);

    db = grn_db_create(context,"mroonga.grn",NULL);
    cut_assert_not_null(db);

    /* actual table */
    t1 = grn_table_create(context,"t1",2,"mrn/t1.grn",
                          GRN_OBJ_TABLE_NO_KEY|GRN_OBJ_PERSISTENT,NULL,0);
    cut_assert_not_null(t1);

    /* lexicon table */
    lc = grn_table_create(context,"lc",2,"mrn/lc.grn",
                          GRN_OBJ_TABLE_PAT_KEY|GRN_OBJ_PERSISTENT,
                          grn_ctx_at(context, GRN_DB_SHORT_TEXT), 0);
    cut_assert_not_null(lc);
    grn_test_assert(grn_obj_set_info(context, lc, GRN_INFO_DEFAULT_TOKENIZER,
                                     grn_ctx_at(context, GRN_DB_BIGRAM)));

    /* actual column */
    c1 = grn_column_create(context,t1,"c1",2,"mrn/t1.c1.grn",
                           GRN_OBJ_COLUMN_SCALAR|GRN_OBJ_PERSISTENT,
                           grn_ctx_at(context, GRN_DB_TEXT));
    cut_assert_not_null(c1);

    /* fulltext index */
    ft = grn_column_create(context,lc,"ft",2,"mrn/lc.ft.grn",
                           GRN_OBJ_COLUMN_INDEX|GRN_OBJ_PERSISTENT,t1);
    cut_assert_not_null(ft);

    GRN_TEXT_INIT(&buff,0);

    /* link between actual column and fulltext index */
    GRN_UINT32_SET(context, &buff, grn_obj_id(context, c1));
    grn_obj_set_info(context, ft, GRN_INFO_SOURCE, &buff); /* need to use grn_id */

    /* insert row */
    r1 = grn_table_add(context, t1, NULL, 0, NULL);
    cut_assert_equal_int(1,r1);
    GRN_TEXT_SETS(context, &buff, "abcde");
    grn_test_assert(grn_obj_set_value(context, c1, r1, &buff, GRN_OBJ_SET));

    r2 = grn_table_add(context, t1, NULL, 0, NULL);
    cut_assert_equal_int(2,r2);
    GRN_TEXT_SETS(context, &buff, "fghij");
    grn_test_assert(grn_obj_set_value(context, c1, r2, &buff, GRN_OBJ_SET));

    r3 = grn_table_add(context, t1, NULL, 0, NULL);
    cut_assert_equal_int(3,r3);
    GRN_TEXT_SETS(context, &buff, "11 22 33");
    grn_test_assert(grn_obj_set_value(context, c1, r3, &buff, GRN_OBJ_SET));

    r4 = grn_table_add(context, t1, NULL, 0, NULL);
    cut_assert_equal_int(4,r4);
    GRN_TEXT_SETS(context, &buff, "44 22 55");
    grn_test_assert(grn_obj_set_value(context, c1, r4, &buff, GRN_OBJ_SET));

    /* confirm record are inserted in both column and index */
    cut_assert_equal_int(4,grn_table_size(context,t1));
    cut_assert_equal_int(23,grn_table_size(context,lc));

    /* nlq search */
    {
        grn_id id, docid;
        grn_obj *res;
        grn_table_cursor *tc;
        grn_obj score, *score_column;
        res = grn_table_create(context, NULL, 0, NULL,
                               GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, t1, 0);
        GRN_UINT32_INIT(&score, 0);
        GRN_BULK_REWIND(&buff);
        GRN_TEXT_SETS(context, &buff, "hij");
        grn_obj_search(context, ft, &buff, res, GRN_OP_OR, NULL);
        cut_assert_equal_int(1, grn_table_size(context, res));
        score_column = grn_obj_column(context, res, ".:score", 7);
        tc = grn_table_cursor_open(context, res, NULL, 0, NULL, 0, 0, 0, 0);
        while ((id = grn_table_cursor_next(context, tc))) {
            GRN_BULK_REWIND(&buff);
            grn_table_get_key(context, res, id, &docid, sizeof(grn_id));
            cut_assert_equal_int(2, docid);
            cut_assert_not_null(grn_obj_get_value(context, c1, docid, &buff));
            cut_assert_equal_int(5 ,GRN_TEXT_LEN(&buff));
            cut_assert_equal_substring("fghij", (char*) GRN_BULK_HEAD(&buff),GRN_TEXT_LEN(&buff));
            grn_obj_get_value(context, score_column, id, &score);
            cut_assert_equal_uint(1, GRN_UINT32_VALUE(&score));
        }
        grn_table_cursor_close(context, tc);
        grn_obj_close(context, score_column);
        grn_obj_close(context, res);
    }

    /* boolean search */
    {
        grn_id id, docid;
        grn_obj *res;
        grn_query *query;
        grn_table_cursor *tc;
        grn_obj score, *score_column;
        const char *qstr = "+22 -55";
        res = grn_table_create(context, NULL, 0, NULL,
                               GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, t1, 0);
        GRN_UINT32_INIT(&score, 0);
        query = grn_query_open(context, qstr, strlen(qstr), GRN_OP_OR, 32);
        grn_obj_search(context, ft, (grn_obj*) query, res, GRN_OP_OR, NULL);
        cut_assert_equal_int(1, grn_table_size(context, res));
        score_column = grn_obj_column(context, res, ".:score", 7);
        tc = grn_table_cursor_open(context, res, NULL, 0, NULL, 0, 0, 0, 0);
        while ((id = grn_table_cursor_next(context, tc))) {
            GRN_BULK_REWIND(&buff);
            grn_table_get_key(context, res, id, &docid, sizeof(grn_id));
            cut_assert_equal_int(3, docid);
            cut_assert_not_null(grn_obj_get_value(context, c1, docid, &buff));
            cut_assert_equal_int(8 ,GRN_TEXT_LEN(&buff));
            cut_assert_equal_substring("11 22 33", (char*) GRN_BULK_HEAD(&buff),GRN_TEXT_LEN(&buff));
            grn_obj_get_value(context, score_column, id, &score);
            cut_assert_equal_uint(5, GRN_UINT32_VALUE(&score));
        }
        grn_query_close(context, query);
        grn_table_cursor_close(context ,tc);
        grn_obj_close(context, score_column);
        grn_obj_close(context, res);
    }

    grn_obj_close(context, &buff);
    grn_obj_close(context, ft);
    grn_obj_close(context, c1);
    grn_obj_close(context, lc);
    grn_obj_close(context, t1);
}
Beispiel #18
0
static grn_obj *
create_lexicon_for_tokenize(grn_ctx *ctx,
                            grn_obj *tokenizer_name,
                            grn_obj *normalizer_name,
                            grn_obj *token_filter_names)
{
  grn_obj *lexicon;
  grn_obj *tokenizer;
  grn_obj *normalizer = NULL;

  tokenizer = grn_ctx_get(ctx,
                          GRN_TEXT_VALUE(tokenizer_name),
                          GRN_TEXT_LEN(tokenizer_name));
  if (!tokenizer) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                     "[tokenize] nonexistent tokenizer: <%.*s>",
                     (int)GRN_TEXT_LEN(tokenizer_name),
                     GRN_TEXT_VALUE(tokenizer_name));
    return NULL;
  }

  if (!grn_obj_is_tokenizer_proc(ctx, tokenizer)) {
    grn_obj inspected;
    GRN_TEXT_INIT(&inspected, 0);
    grn_inspect(ctx, &inspected, tokenizer);
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                     "[tokenize] not tokenizer: %.*s",
                     (int)GRN_TEXT_LEN(&inspected),
                     GRN_TEXT_VALUE(&inspected));
    GRN_OBJ_FIN(ctx, &inspected);
    grn_obj_unlink(ctx, tokenizer);
    return NULL;
  }

  if (GRN_TEXT_LEN(normalizer_name) > 0) {
    normalizer = grn_ctx_get(ctx,
                             GRN_TEXT_VALUE(normalizer_name),
                             GRN_TEXT_LEN(normalizer_name));
    if (!normalizer) {
      grn_obj_unlink(ctx, tokenizer);
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "[tokenize] nonexistent normalizer: <%.*s>",
                       (int)GRN_TEXT_LEN(normalizer_name),
                       GRN_TEXT_VALUE(normalizer_name));
      return NULL;
    }

    if (!grn_obj_is_normalizer_proc(ctx, normalizer)) {
      grn_obj inspected;
      grn_obj_unlink(ctx, tokenizer);
      GRN_TEXT_INIT(&inspected, 0);
      grn_inspect(ctx, &inspected, normalizer);
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "[tokenize] not normalizer: %.*s",
                       (int)GRN_TEXT_LEN(&inspected),
                       GRN_TEXT_VALUE(&inspected));
      GRN_OBJ_FIN(ctx, &inspected);
      grn_obj_unlink(ctx, normalizer);
      return NULL;
    }
  }

  lexicon = grn_table_create(ctx, NULL, 0,
                             NULL,
                             GRN_OBJ_TABLE_HASH_KEY,
                             grn_ctx_at(ctx, GRN_DB_SHORT_TEXT),
                             NULL);
  grn_obj_set_info(ctx, lexicon,
                   GRN_INFO_DEFAULT_TOKENIZER, tokenizer);
  grn_obj_unlink(ctx, tokenizer);
  if (normalizer) {
    grn_obj_set_info(ctx, lexicon,
                     GRN_INFO_NORMALIZER, normalizer);
    grn_obj_unlink(ctx, normalizer);
  }
  grn_proc_table_set_token_filters(ctx, lexicon, token_filter_names);

  return lexicon;
}
Beispiel #19
0
void
test_mroonga_index_score(void)
{
  grn_obj *t1,*c1,*lc,*ft;
  grn_obj buff;
  grn_id r1,r2,r3,r4;
  const gchar *mrn_dir;

  mrn_dir = cut_build_path(tmp_directory, "mrn", NULL);
  g_mkdir_with_parents(mrn_dir, 0700);

  grn_obj_close(context, db);
  db = grn_db_create(context,
                     cut_build_path(mrn_dir, "mroonga.grn", NULL),
                     NULL);
  cut_assert_not_null(db);

  /* actual table */
  t1 = grn_table_create(context, "t1", 2,
                        cut_build_path(mrn_dir, "t1.grn", NULL),
			GRN_OBJ_TABLE_NO_KEY|GRN_OBJ_PERSISTENT, NULL, 0);
  cut_assert_not_null(t1);

  /* lexicon table */
  lc = grn_table_create(context, "lc", 2,
                        cut_build_path(mrn_dir, "lc.grn", NULL),
			GRN_OBJ_TABLE_PAT_KEY|GRN_OBJ_PERSISTENT,
                        grn_ctx_at(context, GRN_DB_SHORT_TEXT), 0);
  cut_assert_not_null(lc);
  grn_test_assert(grn_obj_set_info(context, lc, GRN_INFO_DEFAULT_TOKENIZER,
				   grn_ctx_at(context, GRN_DB_BIGRAM)));

  /* actual column */
  c1 = grn_column_create(context, t1, "c1", 2,
                         cut_build_path(mrn_dir, "t1.c1.grn", NULL),
			 GRN_OBJ_COLUMN_SCALAR|GRN_OBJ_PERSISTENT,
			 grn_ctx_at(context, GRN_DB_TEXT));
  cut_assert_not_null(c1);

  /* fulltext index */
  ft = grn_column_create(context, lc, "ft", 2,
                         cut_build_path(mrn_dir, "lc.ft.grn", NULL),
			 GRN_OBJ_COLUMN_INDEX|GRN_OBJ_PERSISTENT, t1);
  cut_assert_not_null(ft);

  GRN_TEXT_INIT(&buff,0);

  /* link between actual column and fulltext index */
  GRN_UINT32_SET(context, &buff, grn_obj_id(context, c1));
  grn_obj_set_info(context, ft, GRN_INFO_SOURCE, &buff); /* need to use grn_id */

  /* insert row */
  r1 = grn_table_add(context, t1, NULL, 0, NULL);
  cut_assert_equal_int(1,r1);
  GRN_TEXT_SETS(context, &buff, "abcde");
  grn_test_assert(grn_obj_set_value(context, c1, r1, &buff, GRN_OBJ_SET));

  r2 = grn_table_add(context, t1, NULL, 0, NULL);
  cut_assert_equal_int(2,r2);
  GRN_TEXT_SETS(context, &buff, "fghij");
  grn_test_assert(grn_obj_set_value(context, c1, r2, &buff, GRN_OBJ_SET));

  r3 = grn_table_add(context, t1, NULL, 0, NULL);
  cut_assert_equal_int(3,r3);
  GRN_TEXT_SETS(context, &buff, "11 22 33");
  grn_test_assert(grn_obj_set_value(context, c1, r3, &buff, GRN_OBJ_SET));

  r4 = grn_table_add(context, t1, NULL, 0, NULL);
  cut_assert_equal_int(4,r4);
  GRN_TEXT_SETS(context, &buff, "44 22 55");
  grn_test_assert(grn_obj_set_value(context, c1, r4, &buff, GRN_OBJ_SET));

  /* confirm record are inserted in both column and index */
  cut_assert_equal_int(4,grn_table_size(context,t1));
  cut_assert_equal_int(23,grn_table_size(context,lc));

  /* nlq search */
  {
    grn_id id, docid;
    grn_obj *res;
    grn_table_cursor *tc;
    grn_obj score, *score_column;
    res = grn_table_create(context, NULL, 0, NULL,
                           GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, t1, 0);
    GRN_FLOAT_INIT(&score, 0);
    GRN_BULK_REWIND(&buff);
    GRN_TEXT_SETS(context, &buff, "hij");
    grn_obj_search(context, ft, &buff, res, GRN_OP_OR, NULL);
    cut_assert_equal_int(1, grn_table_size(context, res));
    score_column = grn_obj_column(context, res, "_score", 6);
    tc = grn_table_cursor_open(context, res, NULL, 0, NULL, 0, 0, -1, 0);
    while ((id = grn_table_cursor_next(context, tc))) {
      GRN_BULK_REWIND(&buff);
      grn_table_get_key(context, res, id, &docid, sizeof(grn_id));
      cut_assert_equal_int(2, docid);
      cut_assert_not_null(grn_obj_get_value(context, c1, docid, &buff));
      cut_assert_equal_int(5 ,GRN_TEXT_LEN(&buff));
      cut_assert_equal_substring("fghij", (char*) GRN_BULK_HEAD(&buff),GRN_TEXT_LEN(&buff));
      grn_obj_get_value(context, score_column, id, &score);
      cut_assert_equal_double(1.0, DBL_EPSILON, GRN_FLOAT_VALUE(&score));
    }
    grn_table_cursor_close(context, tc);
    grn_obj_close(context, score_column);
    grn_obj_close(context, res);
  }

  /* boolean search */
  {
    grn_id id, docid;
    grn_obj *res;
    grn_obj *match_columns, *match_columns_variable;
    grn_obj *expression, *expression_variable;
    grn_table_cursor *tc;
    grn_obj score, *score_column;
    const char *match_columns_expression = "c1 * 5";
    const char *qstr = "+22 -55";

    GRN_EXPR_CREATE_FOR_QUERY(context, t1,
                              match_columns, match_columns_variable);
    grn_expr_parse(context, match_columns,
                   match_columns_expression,
                   strlen(match_columns_expression),
                   NULL, GRN_OP_MATCH, GRN_OP_AND,
                   GRN_EXPR_SYNTAX_SCRIPT);
    GRN_EXPR_CREATE_FOR_QUERY(context, t1, expression, expression_variable);
    res = grn_table_create(context, NULL, 0, NULL,
                           GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, t1, 0);
    grn_test_assert(grn_expr_parse(context, expression,
                                   qstr, strlen(qstr),
                                   match_columns,
                                   GRN_OP_MATCH, GRN_OP_OR,
                                   GRN_EXPR_SYNTAX_QUERY));
    grn_table_select(context, t1, expression, res, GRN_OP_OR);
    cut_assert_equal_int(1, grn_table_size(context, res));
    GRN_FLOAT_INIT(&score, 0);
    score_column = grn_obj_column(context, res, "_score", 6);
    tc = grn_table_cursor_open(context, res, NULL, 0, NULL, 0, 0, -1, 0);
    while ((id = grn_table_cursor_next(context, tc))) {
      GRN_BULK_REWIND(&buff);
      grn_table_get_key(context, res, id, &docid, sizeof(grn_id));
      cut_assert_equal_int(3, docid);
      cut_assert_not_null(grn_obj_get_value(context, c1, docid, &buff));
      cut_assert_equal_int(8, GRN_TEXT_LEN(&buff));
      cut_assert_equal_substring("11 22 33", (char*) GRN_BULK_HEAD(&buff),GRN_TEXT_LEN(&buff));
      grn_obj_get_value(context, score_column, id, &score);
      cut_assert_equal_double(5, DBL_EPSILON, GRN_FLOAT_VALUE(&score));
    }
    grn_obj_close(context, expression);
    grn_obj_close(context, match_columns);
    grn_table_cursor_close(context ,tc);
    grn_obj_close(context, score_column);
    grn_obj_close(context, res);
  }

  grn_obj_close(context, &buff);
  grn_obj_close(context, ft);
  grn_obj_close(context, c1);
  grn_obj_close(context, lc);
  grn_obj_close(context, t1);
}