Ejemplo n.º 1
0
static grn_obj *
uvector_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  grn_obj *str, *flags, *mode;
  grn_uvector_tokenizer *tokenizer;
  if (!(flags = grn_ctx_pop(ctx))) {
    ERR(GRN_INVALID_ARGUMENT, "[tokenizer][uvector] missing argument: flags");
    return NULL;
  }
  if (!(str = grn_ctx_pop(ctx))) {
    ERR(GRN_INVALID_ARGUMENT, "[tokenizer][uvector] missing argument: string");
    return NULL;
  }
  if (!(mode = grn_ctx_pop(ctx))) {
    ERR(GRN_INVALID_ARGUMENT, "[tokenizer][uvector] missing argument: mode");
    return NULL;
  }
  if (!(tokenizer = GRN_MALLOC(sizeof(grn_uvector_tokenizer)))) {
    ERR(GRN_NO_MEMORY_AVAILABLE,
        "[tokenizer][uvector] "
        "memory allocation to grn_uvector_tokenizer failed");
    return NULL;
  }
  user_data->ptr = tokenizer;

  grn_tokenizer_token_init(ctx, &(tokenizer->token));
  tokenizer->curr = (byte *)GRN_TEXT_VALUE(str);
  tokenizer->tail = tokenizer->curr + GRN_TEXT_LEN(str);
  tokenizer->unit = sizeof(grn_id);
  return NULL;
}
Ejemplo n.º 2
0
grn_tokenizer_query *
grn_tokenizer_query_open(grn_ctx *ctx, int num_args, grn_obj **args,
                         unsigned int normalize_flags)
{
  grn_obj *flags;
  grn_obj *query_str;
  grn_obj *tokenize_mode;

  GRN_API_ENTER;

  flags = grn_ctx_pop(ctx);
  query_str = grn_ctx_pop(ctx);
  tokenize_mode = grn_ctx_pop(ctx);

  if (query_str == NULL) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "missing argument");
    GRN_API_RETURN(NULL);
  }

  if ((num_args < 1) || (args == NULL) || (args[0] == NULL)) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "invalid NULL pointer");
    GRN_API_RETURN(NULL);
  }

  {
    grn_tokenizer_query * const query =
        GRN_PLUGIN_MALLOC(ctx, sizeof(grn_tokenizer_query));
    if (!query) {
      GRN_API_RETURN(NULL);
    }
    grn_tokenizer_query_init(ctx, query);
    grn_tokenizer_query_set_raw_string(ctx,
                                       query,
                                       GRN_TEXT_VALUE(query_str),
                                       GRN_TEXT_LEN(query_str));
    if (ctx->rc != GRN_SUCCESS) {
      GRN_PLUGIN_FREE(ctx, query);
      GRN_API_RETURN(NULL);
    }
    if (flags) {
      grn_tokenizer_query_set_flags(ctx, query, GRN_UINT32_VALUE(flags));
    }
    if (tokenize_mode) {
      grn_tokenizer_query_set_mode(ctx, query, GRN_UINT32_VALUE(tokenize_mode));
    }
    grn_tokenizer_query_set_normalize_flags(ctx, query, normalize_flags);
    grn_tokenizer_query_set_lexicon(ctx, query, args[0]);

    grn_tokenizer_query_ensure_have_tokenized_delimiter(ctx, query);

    GRN_API_RETURN(query);
  }
}
Ejemplo n.º 3
0
static grn_obj *
delimited_init(grn_ctx *ctx, grn_obj *table, grn_user_data *user_data,
               uint8_t *delimiter, uint32_t delimiter_len)
{
  grn_obj *str;
  int nflags = 0;
  grn_delimited_tokenizer *token;
  grn_obj_flags table_flags;
  if (!(str = grn_ctx_pop(ctx))) {
    ERR(GRN_INVALID_ARGUMENT, "missing argument");
    return NULL;
  }
  if (!(token = GRN_MALLOC(sizeof(grn_delimited_tokenizer)))) { return NULL; }
  user_data->ptr = token;
  token->delimiter = delimiter;
  token->delimiter_len = delimiter_len;
  token->pos = 0;
  grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL);
  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
                                    nflags, token->encoding))) {
    GRN_FREE(token);
    ERR(GRN_TOKENIZER_ERROR, "grn_str_open failed at grn_token_open");
    return NULL;
  }
  token->next = (unsigned char *)token->nstr->norm;
  token->end = token->next + token->nstr->norm_blen;
  token->len = token->nstr->length;
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return NULL;
}
Ejemplo n.º 4
0
static grn_rc
ngram_init(grn_ctx *ctx, grn_obj *table, grn_proc_data *user_data, uint8_t ngram_unit)
{
  grn_obj *str;
  int nflags = GRN_STR_REMOVEBLANK|GRN_STR_WITH_CTYPES;
  grn_ngram_tokenizer *token;
  grn_obj_flags table_flags;
  if (!(str = grn_ctx_pop(ctx))) { return GRN_INVALID_ARGUMENT; }
  if (!(token = GRN_MALLOC(sizeof(grn_ngram_tokenizer)))) { return ctx->rc; }
  user_data->ptr = token;
  token->uni_alpha = 1;
  token->uni_digit = 1;
  token->uni_symbol = 1;
  token->ngram_unit = ngram_unit;
  token->overlap = 0;
  token->pos = 0;
  token->skip = 0;
  grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL);
  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
                                    nflags, token->encoding))) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "grn_str_open failed at grn_token_open");
    return GRN_TOKENIZER_ERROR;
  }
  token->next = (unsigned char *)token->nstr->norm;
  token->end = token->next + token->nstr->norm_blen;
  token->ctypes = token->nstr->ctypes;
  token->len = token->nstr->length;
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return GRN_SUCCESS;
}
Ejemplo n.º 5
0
/*
 * call-seq:
 *   context.pop -> 値
 *
 * コンテキスト内にあるスタックから値を取り出す。このスタッ
 * クにはGroonga::Expression#executeの実行結果が格納される。
 */
static VALUE
rb_grn_context_pop (VALUE self)
{
    grn_ctx *context;
    context = SELF(self);
    return GRNOBJ2RVAL(Qnil, context, grn_ctx_pop(context), self);
}
Ejemplo n.º 6
0
static grn_rc
uvector_init(grn_ctx *ctx, grn_obj *table, grn_proc_data *user_data)
{
  grn_obj *str;
  grn_uvector_tokenizer_info *token;
  if (!(str = grn_ctx_pop(ctx))) { return GRN_INVALID_ARGUMENT; }
  if (!(token = GRN_MALLOC(sizeof(grn_uvector_tokenizer_info)))) { return ctx->rc; }
  user_data->ptr = token;
  token->curr = GRN_TEXT_VALUE(str);
  token->tail = token->curr + GRN_TEXT_LEN(str);
  token->unit = sizeof(grn_id);
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return GRN_SUCCESS;
}
Ejemplo n.º 7
0
static grn_obj *
uvector_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  grn_obj *str;
  grn_uvector_tokenizer_info *token;
  if (!(str = grn_ctx_pop(ctx))) {
    ERR(GRN_INVALID_ARGUMENT, "missing argument");
    return NULL;
  }
  if (!(token = GRN_MALLOC(sizeof(grn_uvector_tokenizer_info)))) { return NULL; }
  user_data->ptr = token;
  token->curr = GRN_TEXT_VALUE(str);
  token->tail = token->curr + GRN_TEXT_LEN(str);
  token->unit = sizeof(grn_id);
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return NULL;
}
Ejemplo n.º 8
0
static grn_obj *
ngram_init(grn_ctx *ctx, grn_obj *table, grn_user_data *user_data, uint8_t ngram_unit,
           uint8_t uni_alpha, uint8_t uni_digit, uint8_t uni_symbol, uint8_t ignore_blank)
{
  grn_obj *str;
  int nflags = GRN_STR_REMOVEBLANK|GRN_STR_WITH_CTYPES;
  grn_ngram_tokenizer *token;
  grn_obj_flags table_flags;
  if (!(str = grn_ctx_pop(ctx))) {
    ERR(GRN_INVALID_ARGUMENT, "missing argument");
    return NULL;
  }
  if (!(token = GRN_MALLOC(sizeof(grn_ngram_tokenizer)))) { return NULL; }
  user_data->ptr = token;
  token->uni_alpha = uni_alpha;
  token->uni_digit = uni_digit;
  token->uni_symbol = uni_symbol;
  token->ngram_unit = ngram_unit;
  token->ignore_blank = ignore_blank;
  token->overlap = 0;
  token->pos = 0;
  token->skip = 0;
  grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL);
  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
                                    nflags, token->encoding))) {
    GRN_FREE(token);
    ERR(GRN_TOKENIZER_ERROR, "grn_str_open failed at grn_token_open");
    return NULL;
  }
  token->next = (unsigned char *)token->nstr->norm;
  token->end = token->next + token->nstr->norm_blen;
  token->ctypes = token->nstr->ctypes;
  token->len = token->nstr->length;
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return NULL;
}
Ejemplo n.º 9
0
grn_rc
grn_command_run(grn_ctx *ctx,
                grn_obj *command,
                grn_command_input *input)
{
  grn_proc *proc;

  GRN_API_ENTER;

  proc = (grn_proc *)command;
  if (proc->callbacks.command.run) {
    proc->callbacks.command.run(ctx, command, input, proc->user_data);
  } else {
    /* TODO: REMOVE ME. For backward compatibility. */
    uint32_t stack_curr = ctx->impl->stack_curr;
    grn_proc_call(ctx, command, 0, command);
    if (ctx->impl->stack_curr > stack_curr) {
      grn_ctx_pop(ctx);
    }
  }

  GRN_API_RETURN(ctx->rc);
}
Ejemplo n.º 10
0
grn_id
grn_token_next(grn_ctx *ctx, grn_token *token)
{
  int status;
  grn_id tid = GRN_ID_NIL;
  grn_obj *table = token->table;
  grn_obj *tokenizer = token->tokenizer;
  while (token->status != grn_token_done) {
    if (tokenizer) {
      grn_obj *curr_, *stat_;
      ((grn_proc *)tokenizer)->funcs[PROC_NEXT](ctx, 1, &table, &token->pctx.user_data);
      stat_ = grn_ctx_pop(ctx);
      curr_ = grn_ctx_pop(ctx);
      token->curr = GRN_TEXT_VALUE(curr_);
      token->curr_size = GRN_TEXT_LEN(curr_);
      status = GRN_UINT32_VALUE(stat_);
      token->status = ((status & GRN_TOKEN_LAST) ||
                       (!token->add && (status & GRN_TOKEN_REACH_END)))
        ? grn_token_done : grn_token_doing;
      token->force_prefix = 0;
      if (status & GRN_TOKEN_UNMATURED) {
        if (status & GRN_TOKEN_OVERLAP) {
          if (!token->add) { token->pos++; continue; }
        } else {
          if (status & GRN_TOKEN_LAST) { token->force_prefix = 1; }
        }
      }
    } else {
      token->curr = token->orig;
      token->curr_size = token->orig_blen;
      token->status = grn_token_done;
    }
    if (token->add) {
      switch (table->header.type) {
      case GRN_TABLE_PAT_KEY :
        if (grn_io_lock(ctx, ((grn_pat *)table)->io, 10000000)) {
          tid = GRN_ID_NIL;
        } else {
          tid = grn_pat_add(ctx, (grn_pat *)table, token->curr, token->curr_size,
                            NULL, NULL);
          grn_io_unlock(((grn_pat *)table)->io);
        }
        break;
      case GRN_TABLE_HASH_KEY :
        if (grn_io_lock(ctx, ((grn_hash *)table)->io, 10000000)) {
          tid = GRN_ID_NIL;
        } else {
          tid = grn_hash_add(ctx, (grn_hash *)table, token->curr, token->curr_size,
                             NULL, NULL);
          grn_io_unlock(((grn_hash *)table)->io);
        }
        break;
      case GRN_TABLE_NO_KEY :
        if (token->curr_size == sizeof(grn_id)) {
          tid = *((grn_id *)token->curr);
        } else {
          tid = GRN_ID_NIL;
        }
        break;
      }
    } else {
      switch (table->header.type) {
      case GRN_TABLE_PAT_KEY :
        tid = grn_pat_get(ctx, (grn_pat *)table, token->curr, token->curr_size, NULL);
        break;
      case GRN_TABLE_HASH_KEY :
        tid = grn_hash_get(ctx, (grn_hash *)table, token->curr, token->curr_size, NULL);
        break;
      case GRN_TABLE_NO_KEY :
        if (token->curr_size == sizeof(grn_id)) {
          tid = *((grn_id *)token->curr);
        } else {
          tid = GRN_ID_NIL;
        }
        break;
      }
    }
    if (tid == GRN_ID_NIL && token->status != grn_token_done) {
      token->status = grn_token_not_found;
    }
    token->pos++;
    break;
  }
  return tid;
}
Ejemplo n.º 11
0
/*
  This function is called for a full text search query or a document to be
  indexed. This means that both short/long strings are given.
  The return value of this function is ignored. When an error occurs in this
  function, `ctx->rc' is overwritten with an error code (not GRN_SUCCESS).
 */
static grn_obj *
mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  grn_obj *str;
  int nflags = 0;
  char *buf, *p;
  const char *s;
  grn_obj *table = args[0];
  grn_obj_flags table_flags;
  grn_encoding table_encoding;
  grn_mecab_tokenizer *token;
  unsigned int bufsize, len;
  if (!(str = grn_ctx_pop(ctx))) {
    ERR(GRN_INVALID_ARGUMENT, "missing argument");
    return NULL;
  }
  if (!sole_mecab) {
    CRITICAL_SECTION_ENTER(sole_mecab_lock);
    if (!sole_mecab) {
      sole_mecab = mecab_new2("-Owakati");
      if (!sole_mecab) {
        ERR(GRN_TOKENIZER_ERROR, "mecab_new2 failed on grn_mecab_init: %s",
            mecab_strerror(NULL));
      } else {
        sole_mecab_encoding = get_mecab_encoding(sole_mecab);
      }
    }
    CRITICAL_SECTION_LEAVE(sole_mecab_lock);
  }
  if (!sole_mecab) {
    return NULL;
  }
  grn_table_get_info(ctx, table, &table_flags, &table_encoding, NULL);
  if (table_encoding != sole_mecab_encoding) {
    ERR(GRN_TOKENIZER_ERROR,
        "MeCab dictionary charset (%s) does not match the context encoding: <%s>",
        grn_enctostr(sole_mecab_encoding), grn_enctostr(table_encoding));
    return NULL;
  }
  if (!(token = GRN_MALLOC(sizeof(grn_mecab_tokenizer)))) { return NULL; }
  token->mecab = sole_mecab;
  token->encoding = table_encoding;
  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
                                    nflags, token->encoding))) {
    GRN_FREE(token);
    ERR(GRN_TOKENIZER_ERROR, "grn_str_open failed at grn_token_open");
    return NULL;
  }
  len = token->nstr->norm_blen;
  CRITICAL_SECTION_ENTER(sole_mecab_lock);
  s = mecab_sparse_tostr2(token->mecab, token->nstr->norm, len);
  if (!s) {
    ERR(GRN_TOKENIZER_ERROR, "mecab_sparse_tostr failed len=%d err=%s",
        len, mecab_strerror(token->mecab));
  } else {
    bufsize = strlen(s) + 1;
    if (!(buf = GRN_MALLOC(bufsize))) {
      GRN_LOG(ctx, GRN_LOG_ALERT, "buffer allocation on mecab_init failed !");
    } else {
      memcpy(buf, s, bufsize);
    }
  }
  CRITICAL_SECTION_LEAVE(sole_mecab_lock);
  if (!s || !buf) {
    grn_str_close(ctx, token->nstr);
    GRN_FREE(token);
    return NULL;
  }
  /* A certain version of mecab returns trailing lf or spaces. */
  for (p = buf + bufsize - 2;
       buf <= p && isspace(*(unsigned char *)p);
       p--) { *p = '\0'; }
  user_data->ptr = token;
  token->buf = buf;
  token->next = buf;
  token->end = p + 1;
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return NULL;
}
Ejemplo n.º 12
0
grn_tokenizer_query *
grn_tokenizer_query_open(grn_ctx *ctx, int num_args, grn_obj **args,
                         unsigned int normalize_flags)
{
  grn_obj *flags = grn_ctx_pop(ctx);
  grn_obj *query_str = grn_ctx_pop(ctx);
  grn_obj *tokenize_mode = grn_ctx_pop(ctx);

  if (query_str == NULL) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "missing argument");
    return NULL;
  }

  if ((num_args < 1) || (args == NULL) || (args[0] == NULL)) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "invalid NULL pointer");
    return NULL;
  }

  {
    grn_tokenizer_query * const query =
        GRN_PLUGIN_MALLOC(ctx, sizeof(grn_tokenizer_query));
    if (query == NULL) {
      return NULL;
    }
    query->normalized_query = NULL;
    query->query_buf = NULL;
    if (flags) {
      query->flags = GRN_UINT32_VALUE(flags);
    } else {
      query->flags = 0;
    }
    if (tokenize_mode) {
      query->tokenize_mode = GRN_UINT32_VALUE(tokenize_mode);
    } else {
      query->tokenize_mode = GRN_TOKENIZE_ADD;
    }
    query->token_mode = query->tokenize_mode;

    {
      grn_obj * const table = args[0];
      grn_obj_flags table_flags;
      grn_encoding table_encoding;
      unsigned int query_length = GRN_TEXT_LEN(query_str);
      char *query_buf = (char *)GRN_PLUGIN_MALLOC(ctx, query_length + 1);
      grn_obj *normalizer = NULL;

      if (query_buf == NULL) {
        GRN_PLUGIN_FREE(ctx, query);
        GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
                         "[tokenizer] failed to duplicate query");
        return NULL;
      }
      grn_table_get_info(ctx, table, &table_flags, &table_encoding, NULL,
                         &normalizer, NULL);
      {
        grn_obj *normalized_query;
        if (table_flags & GRN_OBJ_KEY_NORMALIZE) {
          normalizer = GRN_NORMALIZER_AUTO;
        }
        normalized_query = grn_string_open_(ctx,
                                            GRN_TEXT_VALUE(query_str),
                                            GRN_TEXT_LEN(query_str),
                                            normalizer,
                                            normalize_flags,
                                            table_encoding);
        if (!normalized_query) {
          GRN_PLUGIN_FREE(ctx, query_buf);
          GRN_PLUGIN_FREE(ctx, query);
          GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
                           "[tokenizer] failed to open normalized string");
          return NULL;
        }
        query->normalized_query = normalized_query;
        grn_memcpy(query_buf, GRN_TEXT_VALUE(query_str), query_length);
        query_buf[query_length] = '\0';
        query->query_buf = query_buf;
        query->ptr = query_buf;
        query->length = query_length;
      }
      query->encoding = table_encoding;

      if (query->flags & GRN_TOKEN_CURSOR_ENABLE_TOKENIZED_DELIMITER) {
        const char *normalized_string;
        unsigned int normalized_string_length;

        grn_string_get_normalized(ctx,
                                  query->normalized_query,
                                  &normalized_string,
                                  &normalized_string_length,
                                  NULL);
        query->have_tokenized_delimiter =
          grn_tokenizer_have_tokenized_delimiter(ctx,
                                                 normalized_string,
                                                 normalized_string_length,
                                                 query->encoding);
      } else {
        query->have_tokenized_delimiter = GRN_FALSE;
      }
    }
    return query;
  }
}
Ejemplo n.º 13
0
void
test_persistent_expr(void)
{
    int i;
    grn_obj *t1, *t2, *c1, *c2, r1, r2, buf;
    t1 = grn_table_create(context, "t1", 2, NULL,
                          GRN_OBJ_TABLE_NO_KEY|GRN_OBJ_PERSISTENT, NULL, NULL);
    cut_assert_not_null(t1);
    t2 = grn_table_create(context, "t2", 2, NULL,
                          GRN_OBJ_TABLE_NO_KEY|GRN_OBJ_PERSISTENT, NULL, NULL);
    cut_assert_not_null(t2);
    c1 = grn_column_create(context, t1, "c1", 2, NULL,
                           GRN_OBJ_PERSISTENT, t2);
    cut_assert_not_null(c1);
    c2 = grn_column_create(context, t2, "c2", 2, NULL,
                           GRN_OBJ_PERSISTENT, t1);
    cut_assert_not_null(c2);
    GRN_TEXT_INIT(&buf, 0);
    GRN_RECORD_INIT(&r1, 0, grn_obj_id(context, t1));
    GRN_RECORD_INIT(&r2, 0, grn_obj_id(context, t2));
    for (i = 0; i < NRECORDS; i++) {
        grn_id i1, i2;
        i1 = grn_table_add(context, t1, NULL, 0, NULL);
        i2 = grn_table_add(context, t2, NULL, 0, NULL);
        GRN_RECORD_SET(context, &r1, i1);
        GRN_RECORD_SET(context, &r2, i2);
        grn_obj_set_value(context, c1, i1, &r2, GRN_OBJ_SET);
        grn_obj_set_value(context, c2, i2, &r1, GRN_OBJ_SET);
    }
    {
        grn_obj *v;
        expr = grn_expr_create(context, "test", 4);
        cut_assert_not_null(expr);
        v = grn_expr_add_var(context, expr, "foo", 3);
        GRN_RECORD_INIT(v, 0, grn_obj_id(context, t1));
        grn_expr_append_obj(context, expr, v, GRN_OP_PUSH, 1);

        GRN_TEXT_SETS(context, &buf, "c1");
        grn_expr_append_const(context, expr, &buf, GRN_OP_PUSH, 1);
        grn_expr_append_op(context, expr, GRN_OP_GET_VALUE, 2);
        GRN_TEXT_SETS(context, &buf, "c2");
        grn_expr_append_const(context, expr, &buf, GRN_OP_PUSH, 1);
        grn_expr_append_op(context, expr, GRN_OP_GET_VALUE, 2);
        GRN_TEXT_SETS(context, &buf, "c1");
        grn_expr_append_const(context, expr, &buf, GRN_OP_PUSH, 1);

        /*
        GRN_TEXT_SETS(context, &buf, "c1.c2.c1");
        grn_expr_append_const(context, expr, &buf);
        */

        grn_expr_append_op(context, expr, GRN_OP_GET_VALUE, 2);
        grn_expr_compile(context, expr);
        grn_test_assert(grn_obj_close(context, expr));
        expr = NULL;
    }
    grn_test_assert(grn_obj_close(context, &buf));

    grn_db_close(context, database);
    database = grn_db_open(context, path);

    {
        grn_id id;
        uint64_t et;
        int nerr = 0;
        grn_obj *r, *v;
        grn_table_cursor *tc;
        struct timeval tvb, tve;
        expr = get_object("test");
        v = grn_expr_get_var(context, expr, "foo", 3);
        t1 = get_object("t1");
        tc = grn_table_cursor_open(context, t1, NULL, 0, NULL, 0, 0, -1, 0);
        cut_assert_not_null(tc);
        gettimeofday(&tvb, NULL);
        while ((id = grn_table_cursor_next(context, tc))) {
            GRN_RECORD_SET(context, v, id);
            grn_expr_exec(context, expr, 0);
            r = grn_ctx_pop(context);
            if (GRN_RECORD_VALUE(r) != id) {
                nerr++;
            }
        }
        gettimeofday(&tve, NULL);
        et = (tve.tv_sec - tvb.tv_sec) * 1000000 + (tve.tv_usec - tvb.tv_usec);
        // printf("et=%zu\n", et);
        cut_assert_equal_uint(0, nerr);
        grn_test_assert(grn_table_cursor_close(context, tc));
    }
    grn_test_assert(grn_obj_close(context, &r1));
    grn_test_assert(grn_obj_close(context, &r2));
}
Ejemplo n.º 14
0
grn_id
grn_token_next(grn_ctx *ctx, grn_token *token)
{
  int status;
  grn_id tid = GRN_ID_NIL;
  grn_obj *table = token->table;
  grn_obj *tokenizer = token->tokenizer;
  while (token->status != GRN_TOKEN_DONE) {
    if (tokenizer) {
      grn_obj *curr_, *stat_;
      ((grn_proc *)tokenizer)->funcs[PROC_NEXT](ctx, 1, &table, &token->pctx.user_data);
      stat_ = grn_ctx_pop(ctx);
      curr_ = grn_ctx_pop(ctx);
      token->curr = (const unsigned char *)GRN_TEXT_VALUE(curr_);
      token->curr_size = GRN_TEXT_LEN(curr_);
      status = GRN_UINT32_VALUE(stat_);
      token->status = ((status & GRN_TOKENIZER_TOKEN_LAST) ||
                       (token->mode == GRN_TOKEN_GET &&
                        (status & GRN_TOKENIZER_TOKEN_REACH_END)))
        ? GRN_TOKEN_DONE : GRN_TOKEN_DOING;
      token->force_prefix = 0;
      if (token->curr_size == 0) {
        char tokenizer_name[GRN_TABLE_MAX_KEY_SIZE];
        int tokenizer_name_length;
        tokenizer_name_length =
          grn_obj_name(ctx, token->tokenizer,
                       tokenizer_name, GRN_TABLE_MAX_KEY_SIZE);
        GRN_LOG(ctx, GRN_WARN,
                "[token_next] ignore an empty token: <%.*s>: <%.*s>",
                tokenizer_name_length, tokenizer_name,
                token->orig_blen, token->orig);
        continue;
      }
      if (token->curr_size > GRN_TABLE_MAX_KEY_SIZE) {
        GRN_LOG(ctx, GRN_WARN,
                "[token_next] ignore too long token. "
                "Token must be less than or equal to %d: <%d>(<%.*s>)",
                GRN_TABLE_MAX_KEY_SIZE,
                token->curr_size,
                token->curr_size, token->curr);
        continue;
      }
      if (status & GRN_TOKENIZER_TOKEN_UNMATURED) {
        if (status & GRN_TOKENIZER_TOKEN_OVERLAP) {
          if (token->mode == GRN_TOKEN_GET) { token->pos++; continue; }
        } else {
          if (status & GRN_TOKENIZER_TOKEN_LAST) { token->force_prefix = 1; }
        }
      }
    } else {
      token->status = GRN_TOKEN_DONE;
    }
    if (token->mode == GRN_TOKEN_ADD) {
      switch (table->header.type) {
      case GRN_TABLE_PAT_KEY :
        if (grn_io_lock(ctx, ((grn_pat *)table)->io, grn_lock_timeout)) {
          tid = GRN_ID_NIL;
        } else {
          tid = grn_pat_add(ctx, (grn_pat *)table, token->curr, token->curr_size,
                            NULL, NULL);
          grn_io_unlock(((grn_pat *)table)->io);
        }
        break;
      case GRN_TABLE_DAT_KEY :
        if (grn_io_lock(ctx, ((grn_dat *)table)->io, grn_lock_timeout)) {
          tid = GRN_ID_NIL;
        } else {
          tid = grn_dat_add(ctx, (grn_dat *)table, token->curr, token->curr_size,
                            NULL, NULL);
          grn_io_unlock(((grn_dat *)table)->io);
        }
        break;
      case GRN_TABLE_HASH_KEY :
        if (grn_io_lock(ctx, ((grn_hash *)table)->io, grn_lock_timeout)) {
          tid = GRN_ID_NIL;
        } else {
          tid = grn_hash_add(ctx, (grn_hash *)table, token->curr, token->curr_size,
                             NULL, NULL);
          grn_io_unlock(((grn_hash *)table)->io);
        }
        break;
      case GRN_TABLE_NO_KEY :
        if (token->curr_size == sizeof(grn_id)) {
          tid = *((grn_id *)token->curr);
        } else {
          tid = GRN_ID_NIL;
        }
        break;
      }
    } else {
      switch (table->header.type) {
      case GRN_TABLE_PAT_KEY :
        tid = grn_pat_get(ctx, (grn_pat *)table, token->curr, token->curr_size, NULL);
        break;
      case GRN_TABLE_DAT_KEY :
        tid = grn_dat_get(ctx, (grn_dat *)table, token->curr, token->curr_size, NULL);
        break;
      case GRN_TABLE_HASH_KEY :
        tid = grn_hash_get(ctx, (grn_hash *)table, token->curr, token->curr_size, NULL);
        break;
      case GRN_TABLE_NO_KEY :
        if (token->curr_size == sizeof(grn_id)) {
          tid = *((grn_id *)token->curr);
        } else {
          tid = GRN_ID_NIL;
        }
        break;
      }
    }
    if (tid == GRN_ID_NIL && token->status != GRN_TOKEN_DONE) {
      token->status = GRN_TOKEN_NOT_FOUND;
    }
    token->pos++;
    break;
  }
  return tid;
}
Ejemplo n.º 15
0
static grn_obj *
command_tag_synonym(grn_ctx *ctx, GNUC_UNUSED int nargs, GNUC_UNUSED grn_obj **args,
                    GNUC_UNUSED grn_user_data *user_data)
{
  GNUC_UNUSED grn_obj *flags = grn_ctx_pop(ctx);
  grn_obj *newvalue = grn_ctx_pop(ctx);
  grn_obj *oldvalue = grn_ctx_pop(ctx);
  GNUC_UNUSED grn_obj *id = grn_ctx_pop(ctx);
  grn_obj buf;
  grn_obj record;
  grn_obj *domain;
  grn_obj *table;
  grn_obj *column;
  int i,n;

  if (GRN_BULK_VSIZE(newvalue) == 0 || GRN_INT32_VALUE(flags) == 0) {
    return NULL;
  }

  table = grn_ctx_at(ctx, oldvalue->header.domain);
  if (table && !is_table(table)) {
    GRN_PLUGIN_LOG(ctx, GRN_LOG_WARNING,
                   "[tag-synonym] "
                   "hooked column must be reference type");
    return NULL;
  }

  column = grn_obj_column(ctx,
                          table,
                          SYNONYM_COLUMN_NAME,
                          SYNONYM_COLUMN_NAME_LEN);
  if (!column) {
    GRN_PLUGIN_LOG(ctx, GRN_LOG_WARNING,
                   "[tag-synonym] "
                   "couldn't open synonym column");
    return NULL;
  }

  GRN_TEXT_INIT(&buf, 0);
  domain = grn_ctx_at(ctx, newvalue->header.domain);
  if (domain && is_string(domain)) {
    GRN_RECORD_INIT(&record, GRN_OBJ_VECTOR, oldvalue->header.domain);
    grn_table_tokenize(ctx, table, GRN_TEXT_VALUE(newvalue), GRN_TEXT_LEN(newvalue), &record, GRN_TRUE);
  } else if (newvalue->header.type == GRN_UVECTOR) {
    record = *newvalue;
  }

  if (is_string(domain) || newvalue->header.type == GRN_UVECTOR) {
    grn_obj value;

    GRN_RECORD_INIT(newvalue, GRN_OBJ_VECTOR, oldvalue->header.domain);
    GRN_UINT32_INIT(&value, 0);
    n = grn_vector_size(ctx, &record);
    for (i = 0; i < n; i++) {
      grn_id tid;
      tid = grn_uvector_get_element(ctx, &record, i, NULL);
      GRN_BULK_REWIND(&value);
      grn_obj_get_value(ctx, column, tid, &value);
      if (GRN_UINT32_VALUE(&value)) {
        GRN_PLUGIN_LOG(ctx, GRN_LOG_INFO,
                       "[tag-synonym] "
                       "changed: tid %d -> %d", tid, GRN_UINT32_VALUE(&value));
        tid = GRN_UINT32_VALUE(&value);
      }
      grn_uvector_add_element(ctx, newvalue, tid, 0);
    }
    grn_obj_unlink(ctx, &value);
  } else {
    grn_id tid;
    grn_obj value;
    tid = GRN_RECORD_VALUE(newvalue);
    GRN_UINT32_INIT(&value, 0);
    grn_obj_get_value(ctx, column, tid, &value);
    if (GRN_UINT32_VALUE(&value)) {
      GRN_PLUGIN_LOG(ctx, GRN_LOG_INFO,
                     "[tag-synonym] "
                     "changed: tid %d -> %d", tid, GRN_UINT32_VALUE(&value));
      tid = GRN_UINT32_VALUE(&value);
      GRN_BULK_REWIND(newvalue);
      GRN_RECORD_SET(ctx, newvalue, tid);
    }
    grn_obj_unlink(ctx, &value);
  }
  grn_obj_unlink(ctx, &buf);

  return NULL;
}
Ejemplo n.º 16
0
static grn_rc
mecab_init(grn_ctx *ctx, grn_obj *table, grn_proc_data *user_data)
{
  grn_obj *str;
  int nflags = 0;
  char *buf, *s, *p;
  char mecab_err[256];
  grn_obj_flags table_flags;
  grn_mecab_tokenizer *token;
  unsigned int bufsize, maxtrial = 10, len;
  if (!(str = grn_ctx_pop(ctx))) { return GRN_INVALID_ARGUMENT; }
  SOLE_MECAB_CONFIRM;
  if (!sole_mecab) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "mecab_new failed on grn_mecab_init");
    return GRN_TOKENIZER_ERROR;
  }
  if (!(token = GRN_MALLOC(sizeof(grn_mecab_tokenizer)))) { return ctx->rc; }
  user_data->ptr = token;
  token->mecab = sole_mecab;
  // if (!(token->mecab = mecab_new3())) {
  grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL);
  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
                                    nflags, token->encoding))) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "grn_str_open failed at grn_token_open");
    return GRN_TOKENIZER_ERROR;
  }
  len = token->nstr->norm_blen;
  mecab_err[sizeof(mecab_err) - 1] = '\0';
  for (bufsize = len * 2 + 1; maxtrial; bufsize *= 2, maxtrial--) {
    if(!(buf = GRN_MALLOC(bufsize + 1))) {
      GRN_LOG(ctx, GRN_LOG_ALERT, "buffer allocation on mecab_init failed !");
      GRN_FREE(token);
      return ctx->rc;
    }
    MUTEX_LOCK(sole_mecab_lock);
    s = mecab_sparse_tostr3(token->mecab, token->nstr->norm, len, buf, bufsize);
    if (!s) {
      strncpy(mecab_err, mecab_strerror(token->mecab), sizeof(mecab_err) - 1);
    }
    MUTEX_UNLOCK(sole_mecab_lock);
    if (s) { break; }
    GRN_FREE(buf);
    if (strstr(mecab_err, "output buffer overflow") == NULL) { break; }
  }
  if (!s) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "mecab_sparse_tostr failed len=%d bufsize=%d err=%s",
            len, bufsize, mecab_err);
    GRN_FREE(token);
    return GRN_TOKENIZER_ERROR;
  }
  // certain version of mecab returns trailing lf or spaces.
  for (p = buf + strlen(buf) - 1;
       buf <= p && (*p == '\n' || isspace(*(unsigned char *)p));
       p--) { *p = '\0'; }
  //grn_log("sparsed='%s'", s);
  token->buf = (unsigned char *)buf;
  token->next = (unsigned char *)buf;
  token->end = (unsigned char *)buf + strlen(buf);
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return GRN_SUCCESS;
}
Ejemplo n.º 17
0
grn_id
grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor)
{
  int status;
  grn_id tid = GRN_ID_NIL;
  grn_obj *table = token_cursor->table;
  grn_obj *tokenizer = token_cursor->tokenizer;
  while (token_cursor->status != GRN_TOKEN_CURSOR_DONE) {
    if (tokenizer) {
      grn_obj *curr_, *stat_;
      ((grn_proc *)tokenizer)->funcs[PROC_NEXT](ctx, 1, &table, &token_cursor->pctx.user_data);
      stat_ = grn_ctx_pop(ctx);
      curr_ = grn_ctx_pop(ctx);
      status = grn_token_cursor_next_apply_token_filters(ctx, token_cursor,
                                                         curr_, stat_);
      token_cursor->status =
        ((status & GRN_TOKEN_LAST) ||
         (token_cursor->mode == GRN_TOKENIZE_GET &&
          (status & GRN_TOKEN_REACH_END)))
        ? GRN_TOKEN_CURSOR_DONE : GRN_TOKEN_CURSOR_DOING;
      token_cursor->force_prefix = GRN_FALSE;
#define SKIP_FLAGS \
      (GRN_TOKEN_SKIP | GRN_TOKEN_SKIP_WITH_POSITION)
      if (status & SKIP_FLAGS) {
        if (status & GRN_TOKEN_SKIP) {
          token_cursor->pos++;
        }
        if (token_cursor->status == GRN_TOKEN_CURSOR_DONE && tid == GRN_ID_NIL) {
          token_cursor->status = GRN_TOKEN_CURSOR_DONE_SKIP;
          break;
        } else {
          continue;
        }
      }
#undef SKIP_FLAGS
      if (status & GRN_TOKEN_FORCE_PREFIX) {
        token_cursor->force_prefix = GRN_TRUE;
      }
      if (token_cursor->curr_size == 0) {
        if (token_cursor->status != GRN_TOKEN_CURSOR_DONE) {
          char tokenizer_name[GRN_TABLE_MAX_KEY_SIZE];
          int tokenizer_name_length;
          tokenizer_name_length =
            grn_obj_name(ctx, token_cursor->tokenizer,
                         tokenizer_name, GRN_TABLE_MAX_KEY_SIZE);
          GRN_LOG(ctx, GRN_WARN,
                  "[token_next] ignore an empty token: <%.*s>: <%.*s>",
                  tokenizer_name_length, tokenizer_name,
                  token_cursor->orig_blen, token_cursor->orig);
        }
        continue;
      }
      if (token_cursor->curr_size > GRN_TABLE_MAX_KEY_SIZE) {
        GRN_LOG(ctx, GRN_WARN,
                "[token_next] ignore too long token. "
                "Token must be less than or equal to %d: <%d>(<%.*s>)",
                GRN_TABLE_MAX_KEY_SIZE,
                token_cursor->curr_size,
                token_cursor->curr_size, token_cursor->curr);
        continue;
      }
      if (status & GRN_TOKEN_UNMATURED) {
        if (status & GRN_TOKEN_OVERLAP) {
          if (token_cursor->mode == GRN_TOKENIZE_GET) {
            token_cursor->pos++;
            continue;
          }
        } else {
          if (status & GRN_TOKEN_REACH_END) {
            token_cursor->force_prefix = GRN_TRUE;
          }
        }
      }
    } else {
      token_cursor->status = GRN_TOKEN_CURSOR_DONE;
    }
    if (token_cursor->mode == GRN_TOKENIZE_ADD) {
      switch (table->header.type) {
      case GRN_TABLE_PAT_KEY :
        if (grn_io_lock(ctx, ((grn_pat *)table)->io, grn_lock_timeout)) {
          tid = GRN_ID_NIL;
        } else {
          tid = grn_pat_add(ctx, (grn_pat *)table, token_cursor->curr, token_cursor->curr_size,
                            NULL, NULL);
          grn_io_unlock(((grn_pat *)table)->io);
        }
        break;
      case GRN_TABLE_DAT_KEY :
        if (grn_io_lock(ctx, ((grn_dat *)table)->io, grn_lock_timeout)) {
          tid = GRN_ID_NIL;
        } else {
          tid = grn_dat_add(ctx, (grn_dat *)table, token_cursor->curr, token_cursor->curr_size,
                            NULL, NULL);
          grn_io_unlock(((grn_dat *)table)->io);
        }
        break;
      case GRN_TABLE_HASH_KEY :
        if (grn_io_lock(ctx, ((grn_hash *)table)->io, grn_lock_timeout)) {
          tid = GRN_ID_NIL;
        } else {
          tid = grn_hash_add(ctx, (grn_hash *)table, token_cursor->curr, token_cursor->curr_size,
                             NULL, NULL);
          grn_io_unlock(((grn_hash *)table)->io);
        }
        break;
      case GRN_TABLE_NO_KEY :
        if (token_cursor->curr_size == sizeof(grn_id)) {
          tid = *((grn_id *)token_cursor->curr);
        } else {
          tid = GRN_ID_NIL;
        }
        break;
      }
    } else if (token_cursor->mode != GRN_TOKENIZE_ONLY) {
      switch (table->header.type) {
      case GRN_TABLE_PAT_KEY :
        tid = grn_pat_get(ctx, (grn_pat *)table, token_cursor->curr, token_cursor->curr_size, NULL);
        break;
      case GRN_TABLE_DAT_KEY :
        tid = grn_dat_get(ctx, (grn_dat *)table, token_cursor->curr, token_cursor->curr_size, NULL);
        break;
      case GRN_TABLE_HASH_KEY :
        tid = grn_hash_get(ctx, (grn_hash *)table, token_cursor->curr, token_cursor->curr_size, NULL);
        break;
      case GRN_TABLE_NO_KEY :
        if (token_cursor->curr_size == sizeof(grn_id)) {
          tid = *((grn_id *)token_cursor->curr);
        } else {
          tid = GRN_ID_NIL;
        }
        break;
      }
    }
    if (token_cursor->mode != GRN_TOKENIZE_ONLY &&
        tid == GRN_ID_NIL && token_cursor->status != GRN_TOKEN_CURSOR_DONE) {
      token_cursor->status = GRN_TOKEN_CURSOR_NOT_FOUND;
    }
    token_cursor->pos++;
    break;
  }
  return tid;
}