Exemple #1
0
void
test_is_tokenizer_proc(gconstpointer data)
{
  const gchar *name;
  grn_obj *object;

  name = gcut_data_get_string(data, "name");
  object = grn_ctx_get(context, name, strlen(name));
  if (gcut_data_get_string(data, "expected")) {
    cut_assert_true(grn_obj_is_tokenizer_proc(context, object));
  } else {
    cut_assert_false(grn_obj_is_tokenizer_proc(context, object));
  }
}
Exemple #2
0
static grn_obj *
create_lexicon_for_tokenize(grn_ctx *ctx,
                            grn_obj *tokenizer_name,
                            grn_obj *normalizer_name,
                            grn_obj *token_filter_names)
{
  grn_obj *lexicon;
  grn_obj *tokenizer;
  grn_obj *normalizer = NULL;

  tokenizer = grn_ctx_get(ctx,
                          GRN_TEXT_VALUE(tokenizer_name),
                          GRN_TEXT_LEN(tokenizer_name));
  if (!tokenizer) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                     "[tokenize] nonexistent tokenizer: <%.*s>",
                     (int)GRN_TEXT_LEN(tokenizer_name),
                     GRN_TEXT_VALUE(tokenizer_name));
    return NULL;
  }

  if (!grn_obj_is_tokenizer_proc(ctx, tokenizer)) {
    grn_obj inspected;
    GRN_TEXT_INIT(&inspected, 0);
    grn_inspect(ctx, &inspected, tokenizer);
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                     "[tokenize] not tokenizer: %.*s",
                     (int)GRN_TEXT_LEN(&inspected),
                     GRN_TEXT_VALUE(&inspected));
    GRN_OBJ_FIN(ctx, &inspected);
    grn_obj_unlink(ctx, tokenizer);
    return NULL;
  }

  if (GRN_TEXT_LEN(normalizer_name) > 0) {
    normalizer = grn_ctx_get(ctx,
                             GRN_TEXT_VALUE(normalizer_name),
                             GRN_TEXT_LEN(normalizer_name));
    if (!normalizer) {
      grn_obj_unlink(ctx, tokenizer);
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "[tokenize] nonexistent normalizer: <%.*s>",
                       (int)GRN_TEXT_LEN(normalizer_name),
                       GRN_TEXT_VALUE(normalizer_name));
      return NULL;
    }

    if (!grn_obj_is_normalizer_proc(ctx, normalizer)) {
      grn_obj inspected;
      grn_obj_unlink(ctx, tokenizer);
      GRN_TEXT_INIT(&inspected, 0);
      grn_inspect(ctx, &inspected, normalizer);
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "[tokenize] not normalizer: %.*s",
                       (int)GRN_TEXT_LEN(&inspected),
                       GRN_TEXT_VALUE(&inspected));
      GRN_OBJ_FIN(ctx, &inspected);
      grn_obj_unlink(ctx, normalizer);
      return NULL;
    }
  }

  lexicon = grn_table_create(ctx, NULL, 0,
                             NULL,
                             GRN_OBJ_TABLE_HASH_KEY,
                             grn_ctx_at(ctx, GRN_DB_SHORT_TEXT),
                             NULL);
  grn_obj_set_info(ctx, lexicon,
                   GRN_INFO_DEFAULT_TOKENIZER, tokenizer);
  grn_obj_unlink(ctx, tokenizer);
  if (normalizer) {
    grn_obj_set_info(ctx, lexicon,
                     GRN_INFO_NORMALIZER, normalizer);
    grn_obj_unlink(ctx, normalizer);
  }
  grn_proc_table_set_token_filters(ctx, lexicon, token_filter_names);

  return lexicon;
}