void test_remove_tokenized_delimiter(gconstpointer data) { grn_obj *string; grn_obj *normalizer = NULL; const gchar *expected; const gchar *input; const gchar *normalized; unsigned int length_in_bytes; int flags = GRN_STRING_REMOVE_TOKENIZED_DELIMITER; GRN_CTX_SET_ENCODING(&context, GRN_ENC_UTF8); input = gcut_data_get_string(data, "input"); flags |= gcut_data_get_int(data, "flags"); if (flags & GRN_OBJ_KEY_NORMALIZE) { normalizer = GRN_NORMALIZER_AUTO; } string = grn_string_open(&context, input, strlen(input), normalizer, flags); grn_string_get_normalized(&context, string, &normalized, &length_in_bytes, NULL); normalized = cut_take_strndup(normalized, length_in_bytes); grn_obj_unlink(&context, string); expected = gcut_data_get_string(data, "expected"); cut_assert_equal_string(expected, normalized); }
void test_normalize(gconstpointer data) { const gchar *utf8_expected, *encoded_expected; const gchar *utf8_input, *encoded_input; grn_str *string; const gchar *normalized_text; guint normalized_text_len; int flags; grn_encoding encoding; encoding = gcut_data_get_int(data, "encoding"); GRN_CTX_SET_ENCODING(&context, encoding); flags = GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS | GRN_STR_WITH_CTYPES; utf8_input = gcut_data_get_string(data, "input"); encoded_input = convert_encoding(utf8_input, encoding); string = grn_str_open(&context, encoded_input, strlen(encoded_input), flags); normalized_text = cut_take_strndup(string->norm, string->norm_blen); normalized_text_len = string->norm_blen; grn_test_assert(grn_str_close(&context, string)); utf8_expected = gcut_data_get_string(data, "expected"); encoded_expected = convert_encoding(utf8_expected, encoding); cut_assert_equal_string(encoded_expected, normalized_text); cut_assert_equal_int(strlen(encoded_expected), normalized_text_len); }
static gboolean run_test(const gchar **test_case_names, const grn_test_data *data) { const gchar *type_name, *table_name; gchar *path; grn_test_assert(grn_ctx_init(context, GRN_CTX_USE_QL)); GRN_CTX_SET_ENCODING(context, GRN_ENC_UTF8); type_name = "name"; type = grn_type_create(context, type_name, strlen(type_name), GRN_OBJ_KEY_UINT, sizeof(grn_id)); path = g_build_filename(base_dir, "table", NULL); g_setenv(GRN_TEST_ENV_TABLE_PATH, path, TRUE); table_name = cut_take_printf("%s: performance-read-write", data->type_name); g_setenv(GRN_TEST_ENV_TABLE_TYPE, data->type_name, TRUE); table = grn_table_create(context, table_name, strlen(table_name), path, GRN_OBJ_PERSISTENT | data->flags, type, NULL); g_free(path); cut_assert_not_null(table); return run(test_case_names, data); }
void test_normalize_broken(gconstpointer data) { grn_obj *string; const gchar *input, *encoded_input; const gchar *normalized_text; grn_encoding input_encoding, context_encoding; gint input_length; guint normalized_text_length, normalized_text_n_characters; int flags = GRN_STRING_WITH_CHECKS | GRN_STRING_WITH_TYPES; context_encoding = gcut_data_get_int(data, "context-encoding"); GRN_CTX_SET_ENCODING(&context, context_encoding); input = gcut_data_get_string(data, "input"); input_encoding = gcut_data_get_int(data, "input-encoding"); input_length = gcut_data_get_int(data, "input-length"); encoded_input = convert_encoding(input, input_encoding); if (input_length < 0) { input_length = strlen(encoded_input); } string = grn_string_open(&context, encoded_input, input_length, GRN_NORMALIZER_AUTO, flags); grn_string_get_normalized(&context, string, &normalized_text, &normalized_text_length, &normalized_text_n_characters); normalized_text = cut_take_strndup(normalized_text, normalized_text_length); grn_obj_unlink(&context, string); cut_assert_equal_string("", normalized_text); cut_assert_equal_int(0, normalized_text_length); cut_assert_equal_int(0, normalized_text_n_characters); }
grn_obj *FieldNormalizer::normalize(const char *string, unsigned int string_length) { MRN_DBUG_ENTER_METHOD(); grn_obj *normalizer = find_grn_normalizer(); int flags = 0; grn_encoding original_encoding = GRN_CTX_GET_ENCODING(ctx_); encoding::set(ctx_, field_->charset()); grn_obj *grn_string = grn_string_open(ctx_, string, string_length, normalizer, flags); GRN_CTX_SET_ENCODING(ctx_, original_encoding); DBUG_RETURN(grn_string); }
/* * call-seq: * context.encoding=(encoding) * * コンテキストが使うエンコーディングを設定する。エンコーディ * ングの指定のしかたはGroonga::Encodingを参照。 */ static VALUE rb_grn_context_set_encoding (VALUE self, VALUE rb_encoding) { grn_ctx *context; grn_encoding encoding; context = SELF(self); encoding = RVAL2GRNENCODING(rb_encoding, NULL); GRN_CTX_SET_ENCODING(context, encoding); return rb_encoding; }
void cut_setup(void) { gchar *table_path, *vgram_path; const gchar *type_name, *table_name; cut_set_fixture_data_dir(grn_test_get_base_dir(), "fixtures", "inverted-index", NULL); logger = setup_grn_logger(); expected_messages = NULL; record_ids = NULL; remove_tmp_directory(); g_mkdir_with_parents(tmp_directory, 0700); path = g_build_filename(tmp_directory, "inverted-index", NULL); context = g_new0(grn_ctx, 1); grn_test_assert(grn_ctx_init(context, GRN_CTX_USE_QL)); GRN_CTX_SET_ENCODING(context, GRN_ENC_UTF8); db = grn_db_create(context, NULL, NULL); grn_ctx_use(context, db); type_name = "name"; type = grn_type_create(context, type_name, strlen(type_name), GRN_OBJ_KEY_VAR_SIZE, TYPE_SIZE); table_name = "lexicon"; table_path = g_build_filename(tmp_directory, "lexicon-table", NULL); lexicon = grn_table_create(context, table_name, strlen(table_name), table_path, GRN_OBJ_PERSISTENT | GRN_OBJ_TABLE_PAT_KEY, type, NULL); grn_obj_set_info(context, lexicon, GRN_INFO_DEFAULT_TOKENIZER, grn_ctx_at(context, GRN_DB_BIGRAM)); g_free(table_path); vgram_path = g_build_filename(tmp_directory, "vgram", NULL); /* vgram = grn_vgram_create(vgram_path); */ g_free(vgram_path); inverted_index = NULL; }
static grn_obj * grn_table_factory_make(grn_table_factory *factory) { grn_obj *value_type = grn_ctx_get(factory->context, VALUE_TYPE_NAME, strlen(VALUE_TYPE_NAME)); if (!value_type) { value_type = grn_type_create(factory->context, VALUE_TYPE_NAME, strlen(VALUE_TYPE_NAME), 0, factory->value_size); } GRN_CTX_SET_ENCODING(factory->context, factory->encoding); return grn_table_create(factory->context, factory->name, factory->name_size, factory->path, factory->flags, factory->key_type, value_type); }
static grn_obj * open_snip(void) { if (snip) { grn_obj_close(&context, (grn_obj *)snip); } GRN_CTX_SET_ENCODING(&context, default_encoding); snip = grn_snip_open(&context, default_flags, default_width, default_max_results, default_open_tag, default_open_tag_len, default_close_tag, default_close_tag_len, default_mapping); return snip; }
static void bench_normal_temporary(gpointer user_data) { BenchmarkData *data = user_data; grn_obj *table; grn_obj *value_type = grn_ctx_get(data->context, VALUE_TYPE_NAME, strlen(VALUE_TYPE_NAME)); if (!value_type) { value_type = grn_type_create(data->context, VALUE_TYPE_NAME, strlen(VALUE_TYPE_NAME), 0, data->value_size); } GRN_CTX_SET_ENCODING(data->context, data->encoding); table = grn_table_create(data->context, data->name, data->name_size, NULL, data->flags & ~GRN_OBJ_PERSISTENT, data->key_type, value_type); grn_obj_close(data->context, table); }
/* * call-seq: * Groonga::Context.new(options=nil) * * コンテキストを作成する。_options_に指定可能な値は以下の通 * り。 * * [+:encoding+] * エンコーディングを指定する。エンコーディングの指定方法 * はGroonga::Encodingを参照。 */ static VALUE rb_grn_context_initialize (int argc, VALUE *argv, VALUE self) { RbGrnContext *rb_grn_context; grn_ctx *context; int flags = 0; VALUE options, default_options; VALUE rb_encoding; rb_scan_args(argc, argv, "01", &options); default_options = rb_grn_context_s_get_default_options(rb_obj_class(self)); if (NIL_P(default_options)) default_options = rb_hash_new(); if (NIL_P(options)) options = rb_hash_new(); options = rb_funcall(default_options, rb_intern("merge"), 1, options); rb_grn_scan_options(options, "encoding", &rb_encoding, NULL); rb_grn_context = ALLOC(RbGrnContext); DATA_PTR(self) = rb_grn_context; rb_grn_context->self = self; context = rb_grn_context->context = grn_ctx_open(flags); rb_grn_context_check(context, self); GRN_CTX_USER_DATA(context)->ptr = rb_grn_context; if (!NIL_P(rb_encoding)) { grn_encoding encoding; encoding = RVAL2GRNENCODING(rb_encoding, NULL); GRN_CTX_SET_ENCODING(context, encoding); } debug("context new: %p\n", context); return Qnil; }
void test_charlen_broken(gconstpointer data) { const gchar *input, *encoded_input, *encoded_input_end; grn_encoding encoding; gint input_length; encoding = gcut_data_get_int(data, "encoding"); GRN_CTX_SET_ENCODING(&context, encoding); input = gcut_data_get_string(data, "input"); input_length = gcut_data_get_int(data, "input-length"); encoded_input = convert_encoding(input, encoding); if (input_length < 0) { input_length = strlen(encoded_input); } encoded_input_end = encoded_input + input_length; cut_assert_equal_uint(0, grn_charlen(&context, encoded_input, encoded_input_end)); }
void test_normalize_without_database(void) { grn_obj *string; const char *input = "Groonga"; int flags = 0; grn_obj_close(&context, database); database = NULL; GRN_CTX_SET_ENCODING(&context, GRN_ENC_UTF8); string = grn_string_open(&context, input, strlen(input), GRN_NORMALIZER_AUTO, flags); cut_assert_null(string); grn_test_assert_error(GRN_INVALID_ARGUMENT, "[string][open] " "NormalizerAuto normalizer isn't available", &context); }
grn_obj *FieldNormalizer::normalize(const char *string, unsigned int string_length) { MRN_DBUG_ENTER_METHOD(); grn_obj normalizer; GRN_TEXT_INIT(&normalizer, 0); find_grn_normalizer(&normalizer); int flags = 0; grn_encoding original_encoding = GRN_CTX_GET_ENCODING(ctx_); encoding::set_raw(ctx_, field_->charset()); grn_obj *grn_string; if (GRN_TEXT_VALUE(&normalizer)[GRN_TEXT_LEN(&normalizer) - 1] == ')') { if (!lexicon_) { lexicon_ = grn_table_create(ctx_, NULL, 0, NULL, GRN_OBJ_TABLE_PAT_KEY, grn_ctx_at(ctx_, GRN_DB_SHORT_TEXT), NULL); } grn_obj_set_info(ctx_, lexicon_, GRN_INFO_NORMALIZER, &normalizer); grn_string = grn_string_open(ctx_, string, string_length, lexicon_, flags); } else { grn_string = grn_string_open(ctx_, string, string_length, grn_ctx_get(ctx_, GRN_TEXT_VALUE(&normalizer), GRN_TEXT_LEN(&normalizer)), flags); } GRN_OBJ_FIN(ctx_, &normalizer); GRN_CTX_SET_ENCODING(ctx_, original_encoding); DBUG_RETURN(grn_string); }
void test_normalize_broken(gconstpointer data) { grn_str *string; const gchar *input, *encoded_input; grn_encoding input_encoding, context_encoding; gint input_length; int flags = GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS | GRN_STR_WITH_CTYPES; context_encoding = gcut_data_get_int(data, "context-encoding"); GRN_CTX_SET_ENCODING(&context, context_encoding); input = gcut_data_get_string(data, "input"); input_encoding = gcut_data_get_int(data, "input-encoding"); input_length = gcut_data_get_int(data, "input-length"); encoded_input = convert_encoding(input, input_encoding); if (input_length < 0) { input_length = strlen(encoded_input); } string = grn_str_open(&context, encoded_input, input_length, flags); cut_assert_equal_string("", string->norm); cut_assert_equal_int(0, string->norm_blen); grn_test_assert(grn_str_close(&context, string)); }
void test_normalize(gconstpointer data) { const gchar *utf8_expected, *encoded_expected; const gchar *utf8_input, *encoded_input; grn_obj *string; const gchar *normalized_text; guint normalized_text_length; guint normalized_text_n_characters; int flags; grn_encoding encoding; encoding = gcut_data_get_int(data, "encoding"); GRN_CTX_SET_ENCODING(&context, encoding); flags = GRN_STRING_WITH_CHECKS | GRN_STRING_WITH_TYPES; utf8_input = gcut_data_get_string(data, "input"); encoded_input = convert_encoding(utf8_input, encoding); string = grn_string_open(&context, encoded_input, strlen(encoded_input), GRN_NORMALIZER_AUTO, flags); grn_string_get_normalized(&context, string, &normalized_text, &normalized_text_length, &normalized_text_n_characters); normalized_text = cut_take_strndup(normalized_text, normalized_text_length); grn_obj_unlink(&context, string); utf8_expected = gcut_data_get_string(data, "expected"); encoded_expected = convert_encoding(utf8_expected, encoding); cut_assert_equal_string(encoded_expected, normalized_text); cut_assert_equal_uint(strlen(encoded_expected), normalized_text_length); cut_assert_equal_uint(g_utf8_strlen(utf8_expected, -1), normalized_text_n_characters); }
int set(grn_ctx *ctx, const CHARSET_INFO *charset) { MRN_DBUG_ENTER_FUNCTION(); if (!charset) { GRN_CTX_SET_ENCODING(ctx, GRN_ENC_NONE); DBUG_RETURN(0); } if (charset->cset == mrn_charset_utf8->cset) { GRN_CTX_SET_ENCODING(ctx, GRN_ENC_UTF8); DBUG_RETURN(0); } if (mrn_charset_utf8mb4 && charset->cset == mrn_charset_utf8mb4->cset) { GRN_CTX_SET_ENCODING(ctx, GRN_ENC_UTF8); DBUG_RETURN(0); } if (charset->cset == mrn_charset_cp932->cset) { GRN_CTX_SET_ENCODING(ctx, GRN_ENC_SJIS); DBUG_RETURN(0); } if (charset->cset == mrn_charset_eucjpms->cset) { GRN_CTX_SET_ENCODING(ctx, GRN_ENC_EUC_JP); DBUG_RETURN(0); } if (charset->cset == mrn_charset_latin1_1->cset) { GRN_CTX_SET_ENCODING(ctx, GRN_ENC_LATIN1); DBUG_RETURN(0); } if (charset->cset == mrn_charset_latin1_2->cset) { GRN_CTX_SET_ENCODING(ctx, GRN_ENC_LATIN1); DBUG_RETURN(0); } if (charset->cset == mrn_charset_koi8r->cset) { GRN_CTX_SET_ENCODING(ctx, GRN_ENC_KOI8R); DBUG_RETURN(0); } if (charset->cset == mrn_charset_binary->cset) { GRN_CTX_SET_ENCODING(ctx, GRN_ENC_NONE); DBUG_RETURN(0); } if (charset->cset == mrn_charset_ascii->cset) { GRN_CTX_SET_ENCODING(ctx, GRN_ENC_UTF8); DBUG_RETURN(0); } if (charset->cset == mrn_charset_sjis->cset) { GRN_CTX_SET_ENCODING(ctx, GRN_ENC_SJIS); DBUG_RETURN(0); } if (charset->cset == mrn_charset_ujis->cset) { GRN_CTX_SET_ENCODING(ctx, GRN_ENC_EUC_JP); DBUG_RETURN(0); } GRN_CTX_SET_ENCODING(ctx, GRN_ENC_NONE); my_printf_error(ER_MRN_CHARSET_NOT_SUPPORT_NUM, ER_MRN_CHARSET_NOT_SUPPORT_STR, MYF(0), charset->name, charset->csname); DBUG_RETURN(ER_MRN_CHARSET_NOT_SUPPORT_NUM); }