コード例 #1
0
ファイル: proc_tokenize.c プロジェクト: XLPE/groonga
static grn_obj *
command_tokenize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  grn_obj *tokenizer_name;
  grn_obj *string;
  grn_obj *normalizer_name;
  grn_obj *flag_names;
  grn_obj *mode_name;
  grn_obj *token_filter_names;

  tokenizer_name = grn_plugin_proc_get_var(ctx, user_data, "tokenizer", -1);
  string = grn_plugin_proc_get_var(ctx, user_data, "string", -1);
  normalizer_name = grn_plugin_proc_get_var(ctx, user_data, "normalizer", -1);
  flag_names = grn_plugin_proc_get_var(ctx, user_data, "flags", -1);
  mode_name = grn_plugin_proc_get_var(ctx, user_data, "mode", -1);
  token_filter_names = grn_plugin_proc_get_var(ctx, user_data, "token_filters", -1);

  if (GRN_TEXT_LEN(tokenizer_name) == 0) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "[tokenize] tokenizer name is missing");
    return NULL;
  }

  if (GRN_TEXT_LEN(string) == 0) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "[tokenize] string is missing");
    return NULL;
  }

  {
    unsigned int flags;
    grn_obj *lexicon;

    flags = parse_tokenize_flags(ctx, flag_names);
    if (ctx->rc != GRN_SUCCESS) {
      return NULL;
    }

    lexicon = create_lexicon_for_tokenize(ctx,
                                          tokenizer_name,
                                          normalizer_name,
                                          token_filter_names);
    if (!lexicon) {
      return NULL;
    }
#define MODE_NAME_EQUAL(name)\
    (GRN_TEXT_LEN(mode_name) == strlen(name) &&\
     memcmp(GRN_TEXT_VALUE(mode_name), name, strlen(name)) == 0)

    {
      grn_obj tokens;
      GRN_VALUE_FIX_SIZE_INIT(&tokens, GRN_OBJ_VECTOR, GRN_ID_NIL);
      if (GRN_TEXT_LEN(mode_name) == 0 || MODE_NAME_EQUAL("ADD")) {
        tokenize(ctx, lexicon, string, GRN_TOKEN_ADD, flags, &tokens);
        output_tokens(ctx, &tokens, lexicon, NULL);
      } else if (MODE_NAME_EQUAL("GET")) {
        tokenize(ctx, lexicon, string, GRN_TOKEN_ADD, flags, &tokens);
        GRN_BULK_REWIND(&tokens);
        tokenize(ctx, lexicon, string, GRN_TOKEN_GET, flags, &tokens);
        output_tokens(ctx, &tokens, lexicon, NULL);
      } else {
        GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                         "[tokenize] invalid mode: <%.*s>",
                         (int)GRN_TEXT_LEN(mode_name), GRN_TEXT_VALUE(mode_name));
      }
      GRN_OBJ_FIN(ctx, &tokens);
    }
#undef MODE_NAME_EQUAL

    grn_obj_unlink(ctx, lexicon);
  }

  return NULL;
}
コード例 #2
0
ファイル: proc_tokenize.c プロジェクト: cosmo0920/groonga
static grn_obj *
command_tokenize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  grn_raw_string tokenizer_raw;
  grn_raw_string string_raw;
  grn_raw_string normalizer_raw;
  grn_raw_string flags_raw;
  grn_raw_string mode_raw;
  grn_raw_string token_filters_raw;

#define GET_VALUE(name)                                         \
  name ## _raw.value =                                          \
    grn_plugin_proc_get_var_string(ctx,                         \
                                   user_data,                   \
                                   #name,                       \
                                   strlen(#name),               \
                                   &(name ## _raw.length))

  GET_VALUE(tokenizer);
  GET_VALUE(string);
  GET_VALUE(normalizer);
  GET_VALUE(flags);
  GET_VALUE(mode);
  GET_VALUE(token_filters);

#undef GET_VALUE

  if (tokenizer_raw.length == 0) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "[tokenize] tokenizer name is missing");
    return NULL;
  }

  if (string_raw.length == 0) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "[tokenize] string is missing");
    return NULL;
  }

  {
    unsigned int flags;
    grn_obj *lexicon;

    flags = parse_tokenize_flags(ctx, &flags_raw);
    if (ctx->rc != GRN_SUCCESS) {
      return NULL;
    }

    lexicon = create_lexicon_for_tokenize(ctx,
                                          &tokenizer_raw,
                                          &normalizer_raw,
                                          &token_filters_raw);
    if (!lexicon) {
      return NULL;
    }

    {
      grn_obj tokens;
      GRN_VALUE_FIX_SIZE_INIT(&tokens, GRN_OBJ_VECTOR, GRN_ID_NIL);
      if (mode_raw.length == 0 ||
          GRN_RAW_STRING_EQUAL_CSTRING(mode_raw, "ADD")) {
        tokenize(ctx, lexicon, &string_raw, GRN_TOKEN_ADD, flags, &tokens);
        output_tokens(ctx, &tokens, lexicon, NULL);
      } else if (GRN_RAW_STRING_EQUAL_CSTRING(mode_raw, "GET")) {
        tokenize(ctx, lexicon, &string_raw, GRN_TOKEN_ADD, flags, &tokens);
        GRN_BULK_REWIND(&tokens);
        tokenize(ctx, lexicon, &string_raw, GRN_TOKEN_GET, flags, &tokens);
        output_tokens(ctx, &tokens, lexicon, NULL);
      } else {
        GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                         "[tokenize] invalid mode: <%.*s>",
                         (int)mode_raw.length,
                         mode_raw.value);
      }
      GRN_OBJ_FIN(ctx, &tokens);
    }
#undef MODE_NAME_EQUAL

    grn_obj_unlink(ctx, lexicon);
  }

  return NULL;
}
コード例 #3
0
ファイル: proc_tokenize.c プロジェクト: XLPE/groonga
static grn_obj *
command_table_tokenize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  grn_obj *table_name;
  grn_obj *string;
  grn_obj *flag_names;
  grn_obj *mode_name;
  grn_obj *index_column_name;

  table_name = grn_plugin_proc_get_var(ctx, user_data, "table", -1);
  string = grn_plugin_proc_get_var(ctx, user_data, "string", -1);
  flag_names = grn_plugin_proc_get_var(ctx, user_data, "flags", -1);
  mode_name = grn_plugin_proc_get_var(ctx, user_data, "mode", -1);
  index_column_name = grn_plugin_proc_get_var(ctx, user_data, "index_column", -1);

  if (GRN_TEXT_LEN(table_name) == 0) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "[table_tokenize] table name is missing");
    return NULL;
  }

  if (GRN_TEXT_LEN(string) == 0) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "[table_tokenize] string is missing");
    return NULL;
  }

  {
    unsigned int flags;
    grn_obj *lexicon;
    grn_obj *index_column = NULL;

    flags = parse_tokenize_flags(ctx, flag_names);
    if (ctx->rc != GRN_SUCCESS) {
      return NULL;
    }

    lexicon = grn_ctx_get(ctx, GRN_TEXT_VALUE(table_name), GRN_TEXT_LEN(table_name));

    if (!lexicon) {
      return NULL;
    }

#define MODE_NAME_EQUAL(name)\
    (GRN_TEXT_LEN(mode_name) == strlen(name) &&\
     memcmp(GRN_TEXT_VALUE(mode_name), name, strlen(name)) == 0)

    if (GRN_TEXT_LEN(index_column_name) > 0) {
      index_column = grn_obj_column(ctx, lexicon,
                                    GRN_TEXT_VALUE(index_column_name),
                                    GRN_TEXT_LEN(index_column_name));
      if (!index_column) {
        GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                         "[table_tokenize] nonexistent index column: <%.*s>",
                         (int)GRN_TEXT_LEN(index_column_name),
                         GRN_TEXT_VALUE(index_column_name));
        goto exit;
      }
      if (index_column->header.type != GRN_COLUMN_INDEX) {
        GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                         "[table_tokenize] index column must be COLUMN_INDEX: <%.*s>",
                         (int)GRN_TEXT_LEN(index_column_name),
                         GRN_TEXT_VALUE(index_column_name));
        goto exit;
      }
    }

    {
      grn_obj tokens;
      GRN_VALUE_FIX_SIZE_INIT(&tokens, GRN_OBJ_VECTOR, GRN_ID_NIL);
    if (GRN_TEXT_LEN(mode_name) == 0 || MODE_NAME_EQUAL("GET")) {
      tokenize(ctx, lexicon, string, GRN_TOKEN_GET, flags, &tokens);
      output_tokens(ctx, &tokens, lexicon, index_column);
    } else if (MODE_NAME_EQUAL("ADD")) {
      tokenize(ctx, lexicon, string, GRN_TOKEN_ADD, flags, &tokens);
      output_tokens(ctx, &tokens, lexicon, index_column);
    } else {
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "[table_tokenize] invalid mode: <%.*s>",
                       (int)GRN_TEXT_LEN(mode_name), GRN_TEXT_VALUE(mode_name));
    }
      GRN_OBJ_FIN(ctx, &tokens);
    }
#undef MODE_NAME_EQUAL

exit:
    grn_obj_unlink(ctx, lexicon);
    if (index_column) {
      grn_obj_unlink(ctx, index_column);
    }
  }

  return NULL;
}
コード例 #4
0
ファイル: proc_tokenize.c プロジェクト: cosmo0920/groonga
static grn_obj *
command_table_tokenize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  grn_raw_string table_raw;
  grn_raw_string string_raw;
  grn_raw_string flags_raw;
  grn_raw_string mode_raw;
  grn_raw_string index_column_raw;

#define GET_VALUE(name)                                         \
  name ## _raw.value =                                          \
    grn_plugin_proc_get_var_string(ctx,                         \
                                   user_data,                   \
                                   #name,                       \
                                   strlen(#name),               \
                                   &(name ## _raw.length))

  GET_VALUE(table);
  GET_VALUE(string);
  GET_VALUE(flags);
  GET_VALUE(mode);
  GET_VALUE(index_column);

#undef GET_VALUE

  if (table_raw.length == 0) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                     "[table_tokenize] table name is missing");
    return NULL;
  }

  if (string_raw.length == 0) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                     "[table_tokenize] string is missing");
    return NULL;
  }

  {
    unsigned int flags;
    grn_obj *lexicon;
    grn_obj *index_column = NULL;

    flags = parse_tokenize_flags(ctx, &flags_raw);
    if (ctx->rc != GRN_SUCCESS) {
      return NULL;
    }

    lexicon = grn_ctx_get(ctx,
                          table_raw.value,
                          table_raw.length);
    if (!lexicon) {
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "[table_tokenize] nonexistent lexicon: <%.*s>",
                       (int)table_raw.length,
                       table_raw.value);
      return NULL;
    }

    if (index_column_raw.length > 0) {
      index_column = grn_obj_column(ctx, lexicon,
                                    index_column_raw.value,
                                    index_column_raw.length);
      if (!index_column) {
        GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                         "[table_tokenize] nonexistent index column: <%.*s>",
                         (int)index_column_raw.length,
                         index_column_raw.value);
        goto exit;
      }
      if (index_column->header.type != GRN_COLUMN_INDEX) {
        GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                         "[table_tokenize] "
                         "index column must be COLUMN_INDEX: <%.*s>",
                         (int)index_column_raw.length,
                         index_column_raw.value);
        goto exit;
      }
    }

    {
      grn_obj tokens;
      GRN_VALUE_FIX_SIZE_INIT(&tokens, GRN_OBJ_VECTOR, GRN_ID_NIL);
      if (mode_raw.length == 0 ||
          GRN_RAW_STRING_EQUAL_CSTRING(mode_raw, "GET")) {
        tokenize(ctx, lexicon, &string_raw, GRN_TOKEN_GET, flags, &tokens);
        output_tokens(ctx, &tokens, lexicon, index_column);
      } else if (GRN_RAW_STRING_EQUAL_CSTRING(mode_raw, "ADD")) {
        tokenize(ctx, lexicon, &string_raw, GRN_TOKEN_ADD, flags, &tokens);
        output_tokens(ctx, &tokens, lexicon, index_column);
      } else {
        GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                         "[table_tokenize] invalid mode: <%.*s>",
                         (int)mode_raw.length,
                         mode_raw.value);
      }
      GRN_OBJ_FIN(ctx, &tokens);
    }
#undef MODE_NAME_EQUAL

exit:
    grn_obj_unlink(ctx, lexicon);
    if (index_column) {
      grn_obj_unlink(ctx, index_column);
    }
  }

  return NULL;
}