Esempio n. 1
0
void
test_normalize(gconstpointer data)
{
  const gchar *utf8_expected, *encoded_expected;
  const gchar *utf8_input, *encoded_input;
  grn_str *string;
  const gchar *normalized_text;
  guint normalized_text_len;
  int flags;
  grn_encoding encoding;

  encoding = gcut_data_get_int(data, "encoding");
  GRN_CTX_SET_ENCODING(&context, encoding);
  flags = GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS | GRN_STR_WITH_CTYPES;
  utf8_input = gcut_data_get_string(data, "input");
  encoded_input = convert_encoding(utf8_input, encoding);
  string = grn_str_open(&context, encoded_input, strlen(encoded_input), flags);
  normalized_text = cut_take_strndup(string->norm, string->norm_blen);
  normalized_text_len = string->norm_blen;
  grn_test_assert(grn_str_close(&context, string));

  utf8_expected = gcut_data_get_string(data, "expected");
  encoded_expected = convert_encoding(utf8_expected, encoding);
  cut_assert_equal_string(encoded_expected, normalized_text);
  cut_assert_equal_int(strlen(encoded_expected), normalized_text_len);
}
Esempio n. 2
0
void
test_remove_tokenized_delimiter(gconstpointer data)
{
  grn_obj *string;
  grn_obj *normalizer = NULL;
  const gchar *expected;
  const gchar *input;
  const gchar *normalized;
  unsigned int length_in_bytes;
  int flags = GRN_STRING_REMOVE_TOKENIZED_DELIMITER;

  GRN_CTX_SET_ENCODING(&context, GRN_ENC_UTF8);

  input = gcut_data_get_string(data, "input");
  flags |= gcut_data_get_int(data, "flags");
  if (flags & GRN_OBJ_KEY_NORMALIZE) {
    normalizer = GRN_NORMALIZER_AUTO;
  }

  string = grn_string_open(&context, input, strlen(input), normalizer, flags);
  grn_string_get_normalized(&context, string,
                            &normalized, &length_in_bytes, NULL);
  normalized = cut_take_strndup(normalized, length_in_bytes);
  grn_obj_unlink(&context, string);

  expected = gcut_data_get_string(data, "expected");
  cut_assert_equal_string(expected, normalized);
}
Esempio n. 3
0
void
test_normalize_broken(gconstpointer data)
{
  grn_obj *string;
  const gchar *input, *encoded_input;
  const gchar *normalized_text;
  grn_encoding input_encoding, context_encoding;
  gint input_length;
  guint normalized_text_length, normalized_text_n_characters;
  int flags = GRN_STRING_WITH_CHECKS | GRN_STRING_WITH_TYPES;

  context_encoding = gcut_data_get_int(data, "context-encoding");
  GRN_CTX_SET_ENCODING(&context, context_encoding);

  input = gcut_data_get_string(data, "input");
  input_encoding = gcut_data_get_int(data, "input-encoding");
  input_length = gcut_data_get_int(data, "input-length");
  encoded_input = convert_encoding(input, input_encoding);
  if (input_length < 0) {
    input_length = strlen(encoded_input);
  }
  string = grn_string_open(&context, encoded_input, input_length,
                           GRN_NORMALIZER_AUTO, flags);
  grn_string_get_normalized(&context, string,
                            &normalized_text,
                            &normalized_text_length,
                            &normalized_text_n_characters);
  normalized_text = cut_take_strndup(normalized_text, normalized_text_length);
  grn_obj_unlink(&context, string);

  cut_assert_equal_string("", normalized_text);
  cut_assert_equal_int(0, normalized_text_length);
  cut_assert_equal_int(0, normalized_text_n_characters);
}
Esempio n. 4
0
void
test_normalize(gconstpointer data)
{
  const gchar *utf8_expected, *encoded_expected;
  const gchar *utf8_input, *encoded_input;
  grn_obj *string;
  const gchar *normalized_text;
  guint normalized_text_length;
  guint normalized_text_n_characters;
  int flags;
  grn_encoding encoding;

  encoding = gcut_data_get_int(data, "encoding");
  GRN_CTX_SET_ENCODING(&context, encoding);
  flags = GRN_STRING_WITH_CHECKS | GRN_STRING_WITH_TYPES;
  utf8_input = gcut_data_get_string(data, "input");
  encoded_input = convert_encoding(utf8_input, encoding);
  string = grn_string_open(&context,
                           encoded_input,
                           strlen(encoded_input),
                           GRN_NORMALIZER_AUTO,
                           flags);
  grn_string_get_normalized(&context, string,
                            &normalized_text,
                            &normalized_text_length,
                            &normalized_text_n_characters);
  normalized_text = cut_take_strndup(normalized_text, normalized_text_length);
  grn_obj_unlink(&context, string);

  utf8_expected = gcut_data_get_string(data, "expected");
  encoded_expected = convert_encoding(utf8_expected, encoding);
  cut_assert_equal_string(encoded_expected, normalized_text);
  cut_assert_equal_uint(strlen(encoded_expected), normalized_text_length);
  cut_assert_equal_uint(g_utf8_strlen(utf8_expected, -1),
                        normalized_text_n_characters);
}
void
test_lcp_search(gconstpointer data)
{
  const grn_trie_test_data *test_data = data;
  gchar key[GRN_PAT_MAX_KEY_SIZE];
  const gchar key1[] = "セナ";
  const gchar key2[] = "ナセナセ";
  const gchar key3[] = "Groonga";
  const gchar key4[] = "セナ + Ruby";
  const gchar key5[] = "セナセナ";

  trie_test_data_set_parameters(test_data);

  cut_assert_create_trie();

  cut_assert_lookup_add(key1);
  cut_assert_lookup_add(key2);
  cut_assert_lookup_add(key3);
  cut_assert_lookup_add(key4);
  cut_assert_lookup_add(key5);

  id = grn_pat_lcp_search(context, trie,
                          test_data->search_key,
                          strlen(test_data->search_key));
  if (test_data->expected_key) {
    int size;
    const gchar *null_terminated_key;

    grn_test_assert_not_nil(id);
    size = grn_pat_get_key(context, trie, id, key, sizeof(key));
    null_terminated_key = cut_take_strndup(key, size);
    cut_assert_equal_string(test_data->expected_key, null_terminated_key);
  } else {
    grn_test_assert_nil(id);
  }
}
Esempio n. 6
0
void
test_setoperation(gconstpointer data)
{
  grn_operator operator;
  grn_obj *entries;
  grn_obj *result1;
  grn_obj *result2;
  const char *dump;

  operator = gcut_data_get_int(data, "operator");

  assert_send_command("table_create Entries TABLE_HASH_KEY ShortText");
  send_command(
    "load "
    "--table Entries "
    "--values '[{\"_key\": \"a\"}, {\"_key\": \"b\"}, {\"_key\": \"c\"}]'");

  entries = grn_ctx_get(context, "Entries", -1);
  {
    const char *condition = "_id < 3";
    grn_obj *expr;
    grn_obj *variable;

    GRN_EXPR_CREATE_FOR_QUERY(context, entries, expr, variable);
    grn_expr_parse(context, expr,
                   condition, strlen(condition),
                   NULL, GRN_OP_AND, GRN_OP_MATCH, GRN_EXPR_SYNTAX_SCRIPT);
    result1 = grn_table_select(context, entries, expr, NULL, GRN_OP_OR);
    grn_obj_unlink(context, expr);
  }
  {
    const char *condition = "_id > 1";
    grn_obj *expr;
    grn_obj *variable;

    GRN_EXPR_CREATE_FOR_QUERY(context, entries, expr, variable);
    grn_expr_parse(context, expr,
                   condition, strlen(condition),
                   NULL, GRN_OP_AND, GRN_OP_MATCH, GRN_EXPR_SYNTAX_SCRIPT);
    result2 = grn_table_select(context, entries, expr, NULL, GRN_OP_OR);
    grn_obj_unlink(context, expr);
  }

  grn_table_setoperation(context, result1, result2, result1, operator);

  {
    grn_bool first_record = GRN_TRUE;
    grn_obj buffer;
    grn_obj *score_accessor;
    grn_obj score;

    GRN_TEXT_INIT(&buffer, 0);
    GRN_TEXT_PUTS(context, &buffer, "[");
    score_accessor = grn_obj_column(context, result1,
                                    GRN_COLUMN_NAME_SCORE,
                                    GRN_COLUMN_NAME_SCORE_LEN);
    GRN_FLOAT_INIT(&score, 0);
    GRN_TABLE_EACH_BEGIN(context, result1, cursor, id) {
      void *result_key;
      grn_id entry_id;
      char entry_key[GRN_TABLE_MAX_KEY_SIZE];
      int entry_key_size;

      if (first_record) {
        first_record = GRN_FALSE;
      } else {
        GRN_TEXT_PUTS(context, &buffer, ", ");
      }

      GRN_TEXT_PUTS(context, &buffer, "[");

      grn_table_cursor_get_key(context, cursor, &result_key);
      entry_id = *((grn_id *)result_key);
      entry_key_size = grn_table_get_key(context,
                                         entries,
                                         entry_id,
                                         entry_key,
                                         GRN_TABLE_MAX_KEY_SIZE);
      GRN_TEXT_PUT(context, &buffer, entry_key, entry_key_size);

      GRN_TEXT_PUTS(context, &buffer, ", ");

      GRN_BULK_REWIND(&score);
      grn_obj_get_value(context, score_accessor, id, &score);
      grn_text_printf(context, &buffer, "%.1f", GRN_FLOAT_VALUE(&score));

      GRN_TEXT_PUTS(context, &buffer, "]");
    } GRN_TABLE_EACH_END(context, cursor);
    GRN_OBJ_FIN(context, &score);
    grn_obj_unlink(context, score_accessor);
    GRN_TEXT_PUTS(context, &buffer, "]");

    dump = cut_take_strndup(GRN_TEXT_VALUE(&buffer), GRN_TEXT_LEN(&buffer));
    GRN_OBJ_FIN(context, &buffer);
  }