예제 #1
0
void
test_remove_tokenized_delimiter(gconstpointer data)
{
  grn_obj *string;
  grn_obj *normalizer = NULL;
  const gchar *expected;
  const gchar *input;
  const gchar *normalized;
  unsigned int length_in_bytes;
  int flags = GRN_STRING_REMOVE_TOKENIZED_DELIMITER;

  GRN_CTX_SET_ENCODING(&context, GRN_ENC_UTF8);

  input = gcut_data_get_string(data, "input");
  flags |= gcut_data_get_int(data, "flags");
  if (flags & GRN_OBJ_KEY_NORMALIZE) {
    normalizer = GRN_NORMALIZER_AUTO;
  }

  string = grn_string_open(&context, input, strlen(input), normalizer, flags);
  grn_string_get_normalized(&context, string,
                            &normalized, &length_in_bytes, NULL);
  normalized = cut_take_strndup(normalized, length_in_bytes);
  grn_obj_unlink(&context, string);

  expected = gcut_data_get_string(data, "expected");
  cut_assert_equal_string(expected, normalized);
}
예제 #2
0
파일: test-string.c 프로젝트: mooz/groonga
void
test_normalize(gconstpointer data)
{
  const gchar *utf8_expected, *encoded_expected;
  const gchar *utf8_input, *encoded_input;
  grn_str *string;
  const gchar *normalized_text;
  guint normalized_text_len;
  int flags;
  grn_encoding encoding;

  encoding = gcut_data_get_int(data, "encoding");
  GRN_CTX_SET_ENCODING(&context, encoding);
  flags = GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS | GRN_STR_WITH_CTYPES;
  utf8_input = gcut_data_get_string(data, "input");
  encoded_input = convert_encoding(utf8_input, encoding);
  string = grn_str_open(&context, encoded_input, strlen(encoded_input), flags);
  normalized_text = cut_take_strndup(string->norm, string->norm_blen);
  normalized_text_len = string->norm_blen;
  grn_test_assert(grn_str_close(&context, string));

  utf8_expected = gcut_data_get_string(data, "expected");
  encoded_expected = convert_encoding(utf8_expected, encoding);
  cut_assert_equal_string(encoded_expected, normalized_text);
  cut_assert_equal_int(strlen(encoded_expected), normalized_text_len);
}
예제 #3
0
static gboolean
run_test(const gchar **test_case_names, const grn_test_data *data)
{
  const gchar *type_name, *table_name;
  gchar *path;

  grn_test_assert(grn_ctx_init(context, GRN_CTX_USE_QL));

  GRN_CTX_SET_ENCODING(context, GRN_ENC_UTF8);

  type_name = "name";
  type = grn_type_create(context, type_name, strlen(type_name),
                         GRN_OBJ_KEY_UINT, sizeof(grn_id));

  path = g_build_filename(base_dir, "table", NULL);
  g_setenv(GRN_TEST_ENV_TABLE_PATH, path, TRUE);

  table_name = cut_take_printf("%s: performance-read-write", data->type_name);
  g_setenv(GRN_TEST_ENV_TABLE_TYPE, data->type_name, TRUE);
  table = grn_table_create(context,
                           table_name, strlen(table_name),
                           path, GRN_OBJ_PERSISTENT | data->flags,
                           type, NULL);
  g_free(path);
  cut_assert_not_null(table);

  return run(test_case_names, data);
}
예제 #4
0
void
test_normalize_broken(gconstpointer data)
{
  grn_obj *string;
  const gchar *input, *encoded_input;
  const gchar *normalized_text;
  grn_encoding input_encoding, context_encoding;
  gint input_length;
  guint normalized_text_length, normalized_text_n_characters;
  int flags = GRN_STRING_WITH_CHECKS | GRN_STRING_WITH_TYPES;

  context_encoding = gcut_data_get_int(data, "context-encoding");
  GRN_CTX_SET_ENCODING(&context, context_encoding);

  input = gcut_data_get_string(data, "input");
  input_encoding = gcut_data_get_int(data, "input-encoding");
  input_length = gcut_data_get_int(data, "input-length");
  encoded_input = convert_encoding(input, input_encoding);
  if (input_length < 0) {
    input_length = strlen(encoded_input);
  }
  string = grn_string_open(&context, encoded_input, input_length,
                           GRN_NORMALIZER_AUTO, flags);
  grn_string_get_normalized(&context, string,
                            &normalized_text,
                            &normalized_text_length,
                            &normalized_text_n_characters);
  normalized_text = cut_take_strndup(normalized_text, normalized_text_length);
  grn_obj_unlink(&context, string);

  cut_assert_equal_string("", normalized_text);
  cut_assert_equal_int(0, normalized_text_length);
  cut_assert_equal_int(0, normalized_text_n_characters);
}
예제 #5
0
 grn_obj *FieldNormalizer::normalize(const char *string,
                                     unsigned int string_length) {
   MRN_DBUG_ENTER_METHOD();
   grn_obj *normalizer = find_grn_normalizer();
   int flags = 0;
   grn_encoding original_encoding = GRN_CTX_GET_ENCODING(ctx_);
   encoding::set(ctx_, field_->charset());
   grn_obj *grn_string = grn_string_open(ctx_, string, string_length,
                                         normalizer, flags);
   GRN_CTX_SET_ENCODING(ctx_, original_encoding);
   DBUG_RETURN(grn_string);
 }
예제 #6
0
/*
 * call-seq:
 *   context.encoding=(encoding)
 *
 * コンテキストが使うエンコーディングを設定する。エンコーディ
 * ングの指定のしかたはGroonga::Encodingを参照。
 */
static VALUE
rb_grn_context_set_encoding (VALUE self, VALUE rb_encoding)
{
    grn_ctx *context;
    grn_encoding encoding;

    context = SELF(self);
    encoding = RVAL2GRNENCODING(rb_encoding, NULL);
    GRN_CTX_SET_ENCODING(context, encoding);

    return rb_encoding;
}
예제 #7
0
void
cut_setup(void)
{
    gchar *table_path, *vgram_path;
    const gchar *type_name, *table_name;

    cut_set_fixture_data_dir(grn_test_get_base_dir(),
                             "fixtures",
                             "inverted-index",
                             NULL);

    logger = setup_grn_logger();

    expected_messages = NULL;
    record_ids = NULL;

    remove_tmp_directory();
    g_mkdir_with_parents(tmp_directory, 0700);
    path = g_build_filename(tmp_directory, "inverted-index", NULL);

    context = g_new0(grn_ctx, 1);
    grn_test_assert(grn_ctx_init(context, GRN_CTX_USE_QL));
    GRN_CTX_SET_ENCODING(context, GRN_ENC_UTF8);

    db = grn_db_create(context, NULL, NULL);
    grn_ctx_use(context, db);

    type_name = "name";
    type = grn_type_create(context, type_name, strlen(type_name),
                           GRN_OBJ_KEY_VAR_SIZE, TYPE_SIZE);

    table_name = "lexicon";
    table_path = g_build_filename(tmp_directory, "lexicon-table", NULL);
    lexicon = grn_table_create(context,
                               table_name, strlen(table_name),
                               table_path,
                               GRN_OBJ_PERSISTENT | GRN_OBJ_TABLE_PAT_KEY,
                               type, NULL);

    grn_obj_set_info(context, lexicon, GRN_INFO_DEFAULT_TOKENIZER,
                     grn_ctx_at(context, GRN_DB_BIGRAM));

    g_free(table_path);

    vgram_path = g_build_filename(tmp_directory, "vgram", NULL);
    /*
      vgram = grn_vgram_create(vgram_path);
    */
    g_free(vgram_path);

    inverted_index = NULL;
}
예제 #8
0
static grn_obj *
grn_table_factory_make(grn_table_factory *factory)
{
  grn_obj *value_type = grn_ctx_get(factory->context, VALUE_TYPE_NAME, strlen(VALUE_TYPE_NAME));
  if (!value_type) {
    value_type = grn_type_create(factory->context, VALUE_TYPE_NAME, strlen(VALUE_TYPE_NAME),
                                 0, factory->value_size);
  }
  GRN_CTX_SET_ENCODING(factory->context, factory->encoding);
  return grn_table_create(factory->context,
                          factory->name, factory->name_size,
                          factory->path, factory->flags,
                          factory->key_type, value_type);
}
예제 #9
0
static grn_obj *
open_snip(void)
{
  if (snip) {
    grn_obj_close(&context, (grn_obj *)snip);
  }
  GRN_CTX_SET_ENCODING(&context, default_encoding);
  snip = grn_snip_open(&context, default_flags,
                       default_width,  default_max_results,
                       default_open_tag, default_open_tag_len,
                       default_close_tag, default_close_tag_len,
                       default_mapping);
  return snip;
}
예제 #10
0
static void
bench_normal_temporary(gpointer user_data)
{
  BenchmarkData *data = user_data;
  grn_obj *table;
  grn_obj *value_type = grn_ctx_get(data->context, VALUE_TYPE_NAME, strlen(VALUE_TYPE_NAME));
  if (!value_type) {
    value_type = grn_type_create(data->context, VALUE_TYPE_NAME, strlen(VALUE_TYPE_NAME),
                                 0, data->value_size);
  }
  GRN_CTX_SET_ENCODING(data->context, data->encoding);
  table = grn_table_create(data->context,
                           data->name, data->name_size,
                           NULL, data->flags & ~GRN_OBJ_PERSISTENT,
                           data->key_type, value_type);
  grn_obj_close(data->context, table);
}
예제 #11
0
/*
 * call-seq:
 *   Groonga::Context.new(options=nil)
 *
 * コンテキストを作成する。_options_に指定可能な値は以下の通
 * り。
 *
 * [+:encoding+]
 *   エンコーディングを指定する。エンコーディングの指定方法
 *   はGroonga::Encodingを参照。
 */
static VALUE
rb_grn_context_initialize (int argc, VALUE *argv, VALUE self)
{
    RbGrnContext *rb_grn_context;
    grn_ctx *context;
    int flags = 0;
    VALUE options, default_options;
    VALUE rb_encoding;

    rb_scan_args(argc, argv, "01", &options);
    default_options = rb_grn_context_s_get_default_options(rb_obj_class(self));
    if (NIL_P(default_options))
	default_options = rb_hash_new();

    if (NIL_P(options))
	options = rb_hash_new();
    options = rb_funcall(default_options, rb_intern("merge"), 1, options);

    rb_grn_scan_options(options,
			"encoding", &rb_encoding,
			NULL);

    rb_grn_context = ALLOC(RbGrnContext);
    DATA_PTR(self) = rb_grn_context;
    rb_grn_context->self = self;
    context = rb_grn_context->context = grn_ctx_open(flags);
    rb_grn_context_check(context, self);

    GRN_CTX_USER_DATA(context)->ptr = rb_grn_context;

    if (!NIL_P(rb_encoding)) {
	grn_encoding encoding;

	encoding = RVAL2GRNENCODING(rb_encoding, NULL);
	GRN_CTX_SET_ENCODING(context, encoding);
    }

    debug("context new: %p\n", context);

    return Qnil;
}
예제 #12
0
파일: test-string.c 프로젝트: mooz/groonga
void
test_charlen_broken(gconstpointer data)
{
  const gchar *input, *encoded_input, *encoded_input_end;
  grn_encoding encoding;
  gint input_length;

  encoding = gcut_data_get_int(data, "encoding");
  GRN_CTX_SET_ENCODING(&context, encoding);

  input = gcut_data_get_string(data, "input");
  input_length = gcut_data_get_int(data, "input-length");
  encoded_input = convert_encoding(input, encoding);
  if (input_length < 0) {
    input_length = strlen(encoded_input);
  }
  encoded_input_end = encoded_input + input_length;
  cut_assert_equal_uint(0, grn_charlen(&context,
                                       encoded_input,
                                       encoded_input_end));
}
예제 #13
0
void
test_normalize_without_database(void)
{
  grn_obj *string;
  const char *input = "Groonga";
  int flags = 0;

  grn_obj_close(&context, database);
  database = NULL;

  GRN_CTX_SET_ENCODING(&context, GRN_ENC_UTF8);
  string = grn_string_open(&context,
                           input,
                           strlen(input),
                           GRN_NORMALIZER_AUTO,
                           flags);
  cut_assert_null(string);
  grn_test_assert_error(GRN_INVALID_ARGUMENT,
                        "[string][open] "
                        "NormalizerAuto normalizer isn't available",
                        &context);
}
예제 #14
0
 grn_obj *FieldNormalizer::normalize(const char *string,
                                     unsigned int string_length) {
   MRN_DBUG_ENTER_METHOD();
   grn_obj normalizer;
   GRN_TEXT_INIT(&normalizer, 0);
   find_grn_normalizer(&normalizer);
   int flags = 0;
   grn_encoding original_encoding = GRN_CTX_GET_ENCODING(ctx_);
   encoding::set_raw(ctx_, field_->charset());
   grn_obj *grn_string;
   if (GRN_TEXT_VALUE(&normalizer)[GRN_TEXT_LEN(&normalizer) - 1] == ')') {
     if (!lexicon_) {
       lexicon_ = grn_table_create(ctx_,
                                   NULL, 0,
                                   NULL,
                                   GRN_OBJ_TABLE_PAT_KEY,
                                   grn_ctx_at(ctx_, GRN_DB_SHORT_TEXT),
                                   NULL);
     }
     grn_obj_set_info(ctx_, lexicon_, GRN_INFO_NORMALIZER, &normalizer);
     grn_string = grn_string_open(ctx_,
                                  string,
                                  string_length,
                                  lexicon_,
                                  flags);
   } else {
     grn_string = grn_string_open(ctx_,
                                  string,
                                  string_length,
                                  grn_ctx_get(ctx_,
                                              GRN_TEXT_VALUE(&normalizer),
                                              GRN_TEXT_LEN(&normalizer)),
                                  flags);
   }
   GRN_OBJ_FIN(ctx_, &normalizer);
   GRN_CTX_SET_ENCODING(ctx_, original_encoding);
   DBUG_RETURN(grn_string);
 }
예제 #15
0
파일: test-string.c 프로젝트: mooz/groonga
void
test_normalize_broken(gconstpointer data)
{
  grn_str *string;
  const gchar *input, *encoded_input;
  grn_encoding input_encoding, context_encoding;
  gint input_length;
  int flags = GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS | GRN_STR_WITH_CTYPES;

  context_encoding = gcut_data_get_int(data, "context-encoding");
  GRN_CTX_SET_ENCODING(&context, context_encoding);

  input = gcut_data_get_string(data, "input");
  input_encoding = gcut_data_get_int(data, "input-encoding");
  input_length = gcut_data_get_int(data, "input-length");
  encoded_input = convert_encoding(input, input_encoding);
  if (input_length < 0) {
    input_length = strlen(encoded_input);
  }
  string = grn_str_open(&context, encoded_input, input_length, flags);
  cut_assert_equal_string("", string->norm);
  cut_assert_equal_int(0, string->norm_blen);
  grn_test_assert(grn_str_close(&context, string));
}
예제 #16
0
void
test_normalize(gconstpointer data)
{
  const gchar *utf8_expected, *encoded_expected;
  const gchar *utf8_input, *encoded_input;
  grn_obj *string;
  const gchar *normalized_text;
  guint normalized_text_length;
  guint normalized_text_n_characters;
  int flags;
  grn_encoding encoding;

  encoding = gcut_data_get_int(data, "encoding");
  GRN_CTX_SET_ENCODING(&context, encoding);
  flags = GRN_STRING_WITH_CHECKS | GRN_STRING_WITH_TYPES;
  utf8_input = gcut_data_get_string(data, "input");
  encoded_input = convert_encoding(utf8_input, encoding);
  string = grn_string_open(&context,
                           encoded_input,
                           strlen(encoded_input),
                           GRN_NORMALIZER_AUTO,
                           flags);
  grn_string_get_normalized(&context, string,
                            &normalized_text,
                            &normalized_text_length,
                            &normalized_text_n_characters);
  normalized_text = cut_take_strndup(normalized_text, normalized_text_length);
  grn_obj_unlink(&context, string);

  utf8_expected = gcut_data_get_string(data, "expected");
  encoded_expected = convert_encoding(utf8_expected, encoding);
  cut_assert_equal_string(encoded_expected, normalized_text);
  cut_assert_equal_uint(strlen(encoded_expected), normalized_text_length);
  cut_assert_equal_uint(g_utf8_strlen(utf8_expected, -1),
                        normalized_text_n_characters);
}
예제 #17
0
 int set(grn_ctx *ctx, const CHARSET_INFO *charset) {
   MRN_DBUG_ENTER_FUNCTION();
   if (!charset)
   {
     GRN_CTX_SET_ENCODING(ctx, GRN_ENC_NONE);
     DBUG_RETURN(0);
   }
   if (charset->cset == mrn_charset_utf8->cset)
   {
     GRN_CTX_SET_ENCODING(ctx, GRN_ENC_UTF8);
     DBUG_RETURN(0);
   }
   if (mrn_charset_utf8mb4 && charset->cset == mrn_charset_utf8mb4->cset)
   {
     GRN_CTX_SET_ENCODING(ctx, GRN_ENC_UTF8);
     DBUG_RETURN(0);
   }
   if (charset->cset == mrn_charset_cp932->cset)
   {
     GRN_CTX_SET_ENCODING(ctx, GRN_ENC_SJIS);
     DBUG_RETURN(0);
   }
   if (charset->cset == mrn_charset_eucjpms->cset)
   {
     GRN_CTX_SET_ENCODING(ctx, GRN_ENC_EUC_JP);
     DBUG_RETURN(0);
   }
   if (charset->cset == mrn_charset_latin1_1->cset)
   {
     GRN_CTX_SET_ENCODING(ctx, GRN_ENC_LATIN1);
     DBUG_RETURN(0);
   }
   if (charset->cset == mrn_charset_latin1_2->cset)
   {
     GRN_CTX_SET_ENCODING(ctx, GRN_ENC_LATIN1);
     DBUG_RETURN(0);
   }
   if (charset->cset == mrn_charset_koi8r->cset)
   {
     GRN_CTX_SET_ENCODING(ctx, GRN_ENC_KOI8R);
     DBUG_RETURN(0);
   }
   if (charset->cset == mrn_charset_binary->cset)
   {
     GRN_CTX_SET_ENCODING(ctx, GRN_ENC_NONE);
     DBUG_RETURN(0);
   }
   if (charset->cset == mrn_charset_ascii->cset)
   {
     GRN_CTX_SET_ENCODING(ctx, GRN_ENC_UTF8);
     DBUG_RETURN(0);
   }
   if (charset->cset == mrn_charset_sjis->cset)
   {
     GRN_CTX_SET_ENCODING(ctx, GRN_ENC_SJIS);
     DBUG_RETURN(0);
   }
   if (charset->cset == mrn_charset_ujis->cset)
   {
     GRN_CTX_SET_ENCODING(ctx, GRN_ENC_EUC_JP);
     DBUG_RETURN(0);
   }
   GRN_CTX_SET_ENCODING(ctx, GRN_ENC_NONE);
   my_printf_error(ER_MRN_CHARSET_NOT_SUPPORT_NUM,
     ER_MRN_CHARSET_NOT_SUPPORT_STR,
     MYF(0), charset->name, charset->csname);
   DBUG_RETURN(ER_MRN_CHARSET_NOT_SUPPORT_NUM);
 }