void test_normalize(gconstpointer data) { const gchar *utf8_expected, *encoded_expected; const gchar *utf8_input, *encoded_input; grn_str *string; const gchar *normalized_text; guint normalized_text_len; int flags; grn_encoding encoding; encoding = gcut_data_get_int(data, "encoding"); GRN_CTX_SET_ENCODING(&context, encoding); flags = GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS | GRN_STR_WITH_CTYPES; utf8_input = gcut_data_get_string(data, "input"); encoded_input = convert_encoding(utf8_input, encoding); string = grn_str_open(&context, encoded_input, strlen(encoded_input), flags); normalized_text = cut_take_strndup(string->norm, string->norm_blen); normalized_text_len = string->norm_blen; grn_test_assert(grn_str_close(&context, string)); utf8_expected = gcut_data_get_string(data, "expected"); encoded_expected = convert_encoding(utf8_expected, encoding); cut_assert_equal_string(encoded_expected, normalized_text); cut_assert_equal_int(strlen(encoded_expected), normalized_text_len); }
void test_remove_tokenized_delimiter(gconstpointer data) { grn_obj *string; grn_obj *normalizer = NULL; const gchar *expected; const gchar *input; const gchar *normalized; unsigned int length_in_bytes; int flags = GRN_STRING_REMOVE_TOKENIZED_DELIMITER; GRN_CTX_SET_ENCODING(&context, GRN_ENC_UTF8); input = gcut_data_get_string(data, "input"); flags |= gcut_data_get_int(data, "flags"); if (flags & GRN_OBJ_KEY_NORMALIZE) { normalizer = GRN_NORMALIZER_AUTO; } string = grn_string_open(&context, input, strlen(input), normalizer, flags); grn_string_get_normalized(&context, string, &normalized, &length_in_bytes, NULL); normalized = cut_take_strndup(normalized, length_in_bytes); grn_obj_unlink(&context, string); expected = gcut_data_get_string(data, "expected"); cut_assert_equal_string(expected, normalized); }
void test_normalize_broken(gconstpointer data) { grn_obj *string; const gchar *input, *encoded_input; const gchar *normalized_text; grn_encoding input_encoding, context_encoding; gint input_length; guint normalized_text_length, normalized_text_n_characters; int flags = GRN_STRING_WITH_CHECKS | GRN_STRING_WITH_TYPES; context_encoding = gcut_data_get_int(data, "context-encoding"); GRN_CTX_SET_ENCODING(&context, context_encoding); input = gcut_data_get_string(data, "input"); input_encoding = gcut_data_get_int(data, "input-encoding"); input_length = gcut_data_get_int(data, "input-length"); encoded_input = convert_encoding(input, input_encoding); if (input_length < 0) { input_length = strlen(encoded_input); } string = grn_string_open(&context, encoded_input, input_length, GRN_NORMALIZER_AUTO, flags); grn_string_get_normalized(&context, string, &normalized_text, &normalized_text_length, &normalized_text_n_characters); normalized_text = cut_take_strndup(normalized_text, normalized_text_length); grn_obj_unlink(&context, string); cut_assert_equal_string("", normalized_text); cut_assert_equal_int(0, normalized_text_length); cut_assert_equal_int(0, normalized_text_n_characters); }
void test_normalize(gconstpointer data) { const gchar *utf8_expected, *encoded_expected; const gchar *utf8_input, *encoded_input; grn_obj *string; const gchar *normalized_text; guint normalized_text_length; guint normalized_text_n_characters; int flags; grn_encoding encoding; encoding = gcut_data_get_int(data, "encoding"); GRN_CTX_SET_ENCODING(&context, encoding); flags = GRN_STRING_WITH_CHECKS | GRN_STRING_WITH_TYPES; utf8_input = gcut_data_get_string(data, "input"); encoded_input = convert_encoding(utf8_input, encoding); string = grn_string_open(&context, encoded_input, strlen(encoded_input), GRN_NORMALIZER_AUTO, flags); grn_string_get_normalized(&context, string, &normalized_text, &normalized_text_length, &normalized_text_n_characters); normalized_text = cut_take_strndup(normalized_text, normalized_text_length); grn_obj_unlink(&context, string); utf8_expected = gcut_data_get_string(data, "expected"); encoded_expected = convert_encoding(utf8_expected, encoding); cut_assert_equal_string(encoded_expected, normalized_text); cut_assert_equal_uint(strlen(encoded_expected), normalized_text_length); cut_assert_equal_uint(g_utf8_strlen(utf8_expected, -1), normalized_text_n_characters); }
void test_lcp_search(gconstpointer data) { const grn_trie_test_data *test_data = data; gchar key[GRN_PAT_MAX_KEY_SIZE]; const gchar key1[] = "セナ"; const gchar key2[] = "ナセナセ"; const gchar key3[] = "Groonga"; const gchar key4[] = "セナ + Ruby"; const gchar key5[] = "セナセナ"; trie_test_data_set_parameters(test_data); cut_assert_create_trie(); cut_assert_lookup_add(key1); cut_assert_lookup_add(key2); cut_assert_lookup_add(key3); cut_assert_lookup_add(key4); cut_assert_lookup_add(key5); id = grn_pat_lcp_search(context, trie, test_data->search_key, strlen(test_data->search_key)); if (test_data->expected_key) { int size; const gchar *null_terminated_key; grn_test_assert_not_nil(id); size = grn_pat_get_key(context, trie, id, key, sizeof(key)); null_terminated_key = cut_take_strndup(key, size); cut_assert_equal_string(test_data->expected_key, null_terminated_key); } else { grn_test_assert_nil(id); } }
void test_setoperation(gconstpointer data) { grn_operator operator; grn_obj *entries; grn_obj *result1; grn_obj *result2; const char *dump; operator = gcut_data_get_int(data, "operator"); assert_send_command("table_create Entries TABLE_HASH_KEY ShortText"); send_command( "load " "--table Entries " "--values '[{\"_key\": \"a\"}, {\"_key\": \"b\"}, {\"_key\": \"c\"}]'"); entries = grn_ctx_get(context, "Entries", -1); { const char *condition = "_id < 3"; grn_obj *expr; grn_obj *variable; GRN_EXPR_CREATE_FOR_QUERY(context, entries, expr, variable); grn_expr_parse(context, expr, condition, strlen(condition), NULL, GRN_OP_AND, GRN_OP_MATCH, GRN_EXPR_SYNTAX_SCRIPT); result1 = grn_table_select(context, entries, expr, NULL, GRN_OP_OR); grn_obj_unlink(context, expr); } { const char *condition = "_id > 1"; grn_obj *expr; grn_obj *variable; GRN_EXPR_CREATE_FOR_QUERY(context, entries, expr, variable); grn_expr_parse(context, expr, condition, strlen(condition), NULL, GRN_OP_AND, GRN_OP_MATCH, GRN_EXPR_SYNTAX_SCRIPT); result2 = grn_table_select(context, entries, expr, NULL, GRN_OP_OR); grn_obj_unlink(context, expr); } grn_table_setoperation(context, result1, result2, result1, operator); { grn_bool first_record = GRN_TRUE; grn_obj buffer; grn_obj *score_accessor; grn_obj score; GRN_TEXT_INIT(&buffer, 0); GRN_TEXT_PUTS(context, &buffer, "["); score_accessor = grn_obj_column(context, result1, GRN_COLUMN_NAME_SCORE, GRN_COLUMN_NAME_SCORE_LEN); GRN_FLOAT_INIT(&score, 0); GRN_TABLE_EACH_BEGIN(context, result1, cursor, id) { void *result_key; grn_id entry_id; char entry_key[GRN_TABLE_MAX_KEY_SIZE]; int entry_key_size; if (first_record) { first_record = GRN_FALSE; } else { GRN_TEXT_PUTS(context, &buffer, ", "); } GRN_TEXT_PUTS(context, &buffer, "["); grn_table_cursor_get_key(context, cursor, &result_key); entry_id = *((grn_id *)result_key); entry_key_size = grn_table_get_key(context, entries, entry_id, entry_key, GRN_TABLE_MAX_KEY_SIZE); GRN_TEXT_PUT(context, &buffer, entry_key, entry_key_size); GRN_TEXT_PUTS(context, &buffer, ", "); GRN_BULK_REWIND(&score); grn_obj_get_value(context, score_accessor, id, &score); grn_text_printf(context, &buffer, "%.1f", GRN_FLOAT_VALUE(&score)); GRN_TEXT_PUTS(context, &buffer, "]"); } GRN_TABLE_EACH_END(context, cursor); GRN_OBJ_FIN(context, &score); grn_obj_unlink(context, score_accessor); GRN_TEXT_PUTS(context, &buffer, "]"); dump = cut_take_strndup(GRN_TEXT_VALUE(&buffer), GRN_TEXT_LEN(&buffer)); GRN_OBJ_FIN(context, &buffer); }