Exemplo n.º 1
0
static void
update_data(grn_id record_id, unsigned int section,
            const gchar *old_name, const gchar *new_name)
{
    grn_obj old_value, new_value;
    const gchar *old_data, *new_data;

    GRN_TEXT_INIT(&old_value, GRN_OBJ_DO_SHALLOW_COPY);
    GRN_TEXT_INIT(&new_value, GRN_OBJ_DO_SHALLOW_COPY);

    if (old_name) {
        old_data = cut_get_fixture_data_string(old_name, NULL);
        GRN_TEXT_SET_REF(&old_value, old_data, strlen(old_data));
    }

    if (new_name) {
        new_data = cut_get_fixture_data_string(new_name, NULL);
        GRN_TEXT_SET_REF(&new_value, new_data, strlen(new_data));
    }

    grn_ii_column_update(context, inverted_index, record_id, section,
                         &old_value, &new_value, NULL);
    grn_obj_close(context, &old_value);
    grn_obj_close(context, &new_value);
}
Exemplo n.º 2
0
static grn_rc
mecab_next(grn_ctx *ctx, grn_obj *table, grn_proc_data *user_data)
{
  size_t cl;
  grn_mecab_tokenizer *token = user_data->ptr;
  const unsigned char *p = token->next, *r;
  const unsigned char *e = token->end;
  for (r = p; r < e; r += cl) {
    if (!(cl = grn_charlen_(ctx, (char *)r, (char *)e, token->encoding))) {
      token->next = (unsigned char *)e;
      break;
    }
    if (grn_isspace((const char *)r, token->encoding)) {
      const unsigned char *q = r;
      while ((cl = grn_isspace((const char *)q, token->encoding))) { q += cl; }
      token->next = (unsigned char *)q;
      break;
    }
  }
  GRN_TEXT_SET_REF(&token->curr_, p, r - p);
  GRN_UINT32_SET(ctx, &token->stat_, r == e ? GRN_TOKEN_LAST : 0);
  grn_ctx_push(ctx, &token->curr_);
  grn_ctx_push(ctx, &token->stat_);
  return GRN_SUCCESS;
}
Exemplo n.º 3
0
/*
 * @overload [](id)
 *   @param id [Integer, Groonga::Record] The record ID for the
 *     column value.
 *
 *   @return [Object] The value for the record ID.
 */
static VALUE
rb_grn_column_cache_array_reference (VALUE self, VALUE rb_id)
{
    RbGrnColumnCache *rb_grn_column_cache;
    grn_id id;
    void *value;
    size_t value_size = 0;

    TypedData_Get_Struct(self,
                         RbGrnColumnCache,
                         &data_type,
                         rb_grn_column_cache);

    if (!rb_grn_column_cache->column_cache) {
        return Qnil;
    }

    id = rb_grn_id_from_ruby_object(rb_id,
                                    rb_grn_column_cache->context,
                                    rb_grn_column_cache->table,
                                    self);
    value = grn_column_cache_ref(rb_grn_column_cache->context,
                                 rb_grn_column_cache->column_cache,
                                 id,
                                 &value_size);
    rb_grn_context_check(rb_grn_column_cache->context, self);
    GRN_TEXT_SET_REF(&(rb_grn_column_cache->buffer),
                     value,
                     value_size);

    return GRNBULK2RVAL(rb_grn_column_cache->context,
                        &(rb_grn_column_cache->buffer),
                        rb_grn_column_cache->range,
                        self);
}
Exemplo n.º 4
0
/*
  This function returns tokens one by one.
 */
static grn_obj *
mecab_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  size_t cl;
  /* grn_obj *table = args[0]; */
  grn_mecab_tokenizer *token = user_data->ptr;
  char *p = token->next, *r;
  char *e = token->end;
  for (r = p; r < e; r += cl) {
    if (!(cl = grn_charlen_(ctx, r, e, token->encoding))) {
      token->next = e;
      break;
    }
    if (grn_isspace(r, token->encoding)) {
      char *q = r;
      while ((cl = grn_isspace(q, token->encoding))) { q += cl; }
      token->next = q;
      break;
    }
  }
  GRN_TEXT_SET_REF(&token->curr_, p, r - p);
  GRN_UINT32_SET(ctx, &token->stat_, r == e ? GRN_TOKEN_LAST : 0);
  grn_ctx_push(ctx, &token->curr_);
  grn_ctx_push(ctx, &token->stat_);
  return NULL;
}
Exemplo n.º 5
0
void
test_read_write(gconstpointer *data)
{
  gint i;
  int added;
  grn_ctx *context;
  grn_obj *table;
  const gchar *path;
  const gchar *value_string;
  gint process_number = 0;
  const gchar *process_number_string;
  const gchar table_name[] = "performance-read-write";
  grn_obj value;
  grn_obj *retrieved_value;
  grn_id id;
  grn_rc rc;

  i = GPOINTER_TO_INT(data);
  process_number_string = g_getenv(GRN_TEST_ENV_PROCESS_NUMBER);
  if (process_number_string)
    process_number = atoi(process_number_string);

  rc = grn_ctx_init(&contexts[i], GRN_CTX_USE_QL);
  grn_test_assert(rc, cut_set_message("context: %d (%d)", i, process_number));
  context = &contexts[i];

  path = g_getenv(GRN_TEST_ENV_TABLE_PATH);
  cut_assert_not_null(path);
  tables[i] = grn_table_open(context, table_name, strlen(table_name),
                             path);
  cut_assert_not_null(tables[i],
                      cut_message("table: %d (%d)", i, process_number));
  table = tables[i];

  grn_test_assert_nil(grn_table_get(context, table, &i, sizeof(grn_id)),
                      cut_message("lookup - fail: (%d:%d)", i, process_number));

  value_string = cut_take_printf("value: (%d:%d)", i, process_number);
  id = grn_table_add(context, table, &i, sizeof(grn_id), &added);
  grn_test_assert_not_nil(id);
  cut_assert_equal_int(1, added);

  GRN_TEXT_INIT(&value, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_TEXT_SET_REF(&value, value_string, strlen(value_string));
  grn_obj_set_value(context, table, id, &value, GRN_OBJ_SET);

  retrieved_value = grn_obj_get_value(context, table, id, NULL);
  grn_test_assert_not_nil(
    id,
    cut_message("lookup - success: (%d:%d)", i, process_number));
  GRN_TEXT_PUTC(context, retrieved_value, '\0');
  cut_assert_equal_string(value_string, GRN_BULK_HEAD(retrieved_value));

  tables[i] = NULL;
  grn_test_assert(grn_obj_close(context, table));

  //  contexts[i] = NULL;
  grn_test_assert(grn_ctx_fin(context));
}
Exemplo n.º 6
0
static grn_obj *
uvector_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  grn_uvector_tokenizer_info *token = user_data->ptr;
  byte *p = token->curr + token->unit;
  if (token->tail < p) {
    GRN_TEXT_SET_REF(&token->curr_, token->curr, 0);
    GRN_UINT32_SET(ctx, &token->stat_, GRN_TOKEN_LAST);
  } else {
    GRN_TEXT_SET_REF(&token->curr_, token->curr, token->unit);
    token->curr = p;
    GRN_UINT32_SET(ctx, &token->stat_, token->tail == p ? GRN_TOKEN_LAST : 0);
  }
  grn_ctx_push(ctx, &token->curr_);
  grn_ctx_push(ctx, &token->stat_);
  return NULL;
}
Exemplo n.º 7
0
void
grn_tokenizer_token_push(grn_ctx *ctx, grn_tokenizer_token *token,
                         const char *str_ptr, unsigned int str_length,
                         grn_token_status status)
{
  GRN_TEXT_SET_REF(&token->str, str_ptr, str_length);
  GRN_UINT32_SET(ctx, &token->status, status);
  grn_ctx_push(ctx, &token->str);
  grn_ctx_push(ctx, &token->status);
}
Exemplo n.º 8
0
grn_obj *
grn_index_sel(grn_ctx *ctx, grn_index *index,
              const char *string, unsigned int string_len)
{
  grn_obj *res;
  grn_obj query;
  GRN_TEXT_INIT(&query, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_TEXT_SET_REF(&query, string, string_len);
  if ((res = grn_table_create(ctx, NULL, 0, NULL, GRN_OBJ_TABLE_HASH_KEY,
                              index->keys, 0))) {
    if ((grn_obj_search(ctx, index->inv, &query, res, GRN_SEL_OR, NULL))) {
      grn_obj_close(ctx, res);
      res =  NULL;
    }
  }
  return res;
}
Exemplo n.º 9
0
grn_rc
grn_index_upd(grn_ctx *ctx, grn_index *index, const char *key,
              const char *oldvalue, unsigned int oldvalue_len,
              const char *newvalue, unsigned int newvalue_len)
{
  grn_id rid = grn_table_add(ctx, index->keys, key, strlen(key), NULL);
  if (rid) {
    grn_obj old, new;
    GRN_TEXT_INIT(&old, GRN_OBJ_DO_SHALLOW_COPY);
    GRN_TEXT_INIT(&new, GRN_OBJ_DO_SHALLOW_COPY);
    GRN_TEXT_SET_REF(&old, oldvalue, oldvalue_len);
    GRN_TEXT_SET_REF(&new, newvalue, newvalue_len);
    grn_column_index_update(ctx, index->inv, rid, 1, &old, &new);
    grn_obj_close(ctx, &old);
    grn_obj_close(ctx, &new);
  }
  return ctx->rc;
}
Exemplo n.º 10
0
Arquivo: token.c Projeto: mooz/groonga
grn_token *
grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len,
               grn_token_mode mode)
{
  grn_token *token;
  grn_encoding encoding;
  grn_obj *tokenizer;
  if (grn_table_get_info(ctx, table, NULL, &encoding, &tokenizer)) { return NULL; }
  if (!(token = GRN_MALLOC(sizeof(grn_token)))) { return NULL; }
  token->table = table;
  token->mode = mode;
  token->encoding = encoding;
  token->tokenizer = tokenizer;
  token->orig = str;
  token->orig_blen = str_len;
  token->curr = NULL;
  token->curr_size = 0;
  token->pos = -1;
  token->status = grn_token_doing;
  token->force_prefix = 0;
  if (tokenizer) {
    grn_obj str_;
    GRN_TEXT_INIT(&str_, GRN_OBJ_DO_SHALLOW_COPY);
    GRN_TEXT_SET_REF(&str_, str, str_len);
    token->pctx.caller = NULL;
    token->pctx.user_data.ptr = NULL;
    token->pctx.proc = (grn_proc *)tokenizer;
    token->pctx.hooks = NULL;
    token->pctx.currh = NULL;
    token->pctx.phase = PROC_INIT;
    grn_ctx_push(ctx, &str_);
    ((grn_proc *)tokenizer)->funcs[PROC_INIT](ctx, 1, &table, &token->pctx.user_data);
    grn_obj_close(ctx, &str_);
  }
  if (ctx->rc) {
    GRN_FREE(token);
    token = NULL;
  }
  return token;
}
Exemplo n.º 11
0
static grn_obj *
delimited_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  size_t cl;
  grn_delimited_tokenizer *token = user_data->ptr;
  const unsigned char *p = token->next, *r;
  const unsigned char *e = token->end;
  for (r = p; r < e; r += cl) {
    if (!(cl = grn_charlen_(ctx, (char *)r, (char *)e, token->encoding))) {
      token->next = (unsigned char *)e;
      break;
    }
    if (r + token->delimiter_len <= e &&
        !memcmp(r, token->delimiter, token->delimiter_len)) {
      token->next = r + token->delimiter_len;
      break;
    }
  }
  GRN_TEXT_SET_REF(&token->curr_, p, r - p);
  GRN_UINT32_SET(ctx, &token->stat_, r == e ? GRN_TOKEN_LAST : 0);
  grn_ctx_push(ctx, &token->curr_);
  grn_ctx_push(ctx, &token->stat_);
  return NULL;
}
Exemplo n.º 12
0
static grn_obj *
ngram_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  size_t cl;
  grn_ngram_tokenizer *token = user_data->ptr;
  const unsigned char *p = token->next, *r = p, *e = token->end;
  int32_t len = 0, pos = token->pos + token->skip, status = 0;
  uint_least8_t *cp = token->ctypes ? token->ctypes + pos : NULL;
  if (cp && token->uni_alpha && GRN_STR_CTYPE(*cp) == grn_str_alpha) {
    while ((cl = grn_charlen_(ctx, (char *)r, (char *)e, token->encoding))) {
      len++;
      r += cl;
      if (/* !token->ignore_blank && */ GRN_STR_ISBLANK(*cp)) { break; }
      if (GRN_STR_CTYPE(*++cp) != grn_str_alpha) { break; }
    }
    token->next = r;
    token->overlap = 0;
  } else if (cp && token->uni_digit && GRN_STR_CTYPE(*cp) == grn_str_digit) {
    while ((cl = grn_charlen_(ctx, (char *)r, (char *)e, token->encoding))) {
      len++;
      r += cl;
      if (/* !token->ignore_blank && */ GRN_STR_ISBLANK(*cp)) { break; }
      if (GRN_STR_CTYPE(*++cp) != grn_str_digit) { break; }
    }
    token->next = r;
    token->overlap = 0;
  } else if (cp && token->uni_symbol && GRN_STR_CTYPE(*cp) == grn_str_symbol) {
    while ((cl = grn_charlen_(ctx, (char *)r, (char *)e, token->encoding))) {
      len++;
      r += cl;
      if (!token->ignore_blank && GRN_STR_ISBLANK(*cp)) { break; }
      if (GRN_STR_CTYPE(*++cp) != grn_str_symbol) { break; }
    }
    token->next = r;
    token->overlap = 0;
  } else {
#ifdef PRE_DEFINED_UNSPLIT_WORDS
    const unsigned char *key = NULL;
    // todo : grn_pat_lcp_search
    if ((tid = grn_sym_common_prefix_search(sym, p))) {
      if (!(key = _grn_sym_key(sym, tid))) {
        token->status = grn_token_not_found;
        return NULL;
      }
      len = grn_str_len(key, token->encoding, NULL);
    }
    r = p + grn_charlen_(ctx, p, e, token->encoding);
    if (tid && (len > 1 || r == p)) {
      if (r != p && pos + len - 1 <= token->tail) { continue; }
      p += strlen(key);
      if (!*p && !token->add) { token->status = grn_token_done; }
    }
#endif /* PRE_DEFINED_UNSPLIT_WORDS */
    if ((cl = grn_charlen_(ctx, (char *)r, (char *)e, token->encoding))) {
      len++;
      r += cl;
      token->next = r;
      while (len < token->ngram_unit &&
             (cl = grn_charlen_(ctx, (char *)r, (char *)e, token->encoding))) {
        if (cp) {
          if (!token->ignore_blank && GRN_STR_ISBLANK(*cp)) { break; }
          cp++;
          if ((token->uni_alpha && GRN_STR_CTYPE(*cp) == grn_str_alpha) ||
              (token->uni_digit && GRN_STR_CTYPE(*cp) == grn_str_digit) ||
              (token->uni_symbol && GRN_STR_CTYPE(*cp) == grn_str_symbol)) { break; }
        }
        len++;
        r += cl;
      }
      if (token->overlap) { status |= GRN_TOKEN_OVERLAP; }
      if (len < token->ngram_unit) { status |= GRN_TOKEN_UNMATURED; }
      token->overlap = (len > 1) ? 1 : 0;
    }
  }
  token->pos = pos;
  token->len = len;
  token->tail = pos + len - 1;
  if (p == r || token->next == e) {
    token->skip = 0;
    status |= GRN_TOKEN_LAST;
  } else {
    token->skip = token->overlap ? 1 : len;
  }
  if (r == e) { status |= GRN_TOKEN_REACH_END; }
  GRN_TEXT_SET_REF(&token->curr_, p, r - p);
  GRN_UINT32_SET(ctx, &token->stat_, status);
  grn_ctx_push(ctx, &token->curr_);
  grn_ctx_push(ctx, &token->stat_);
  return NULL;
}
Exemplo n.º 13
0
grn_token *
grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len,
               grn_token_mode mode, unsigned int flags)
{
  grn_token *token;
  grn_encoding encoding;
  grn_obj *tokenizer;
  grn_obj *normalizer;
  grn_obj_flags table_flags;
  if (grn_table_get_info(ctx, table, &table_flags, &encoding, &tokenizer,
                         &normalizer)) {
    return NULL;
  }
  if (!(token = GRN_MALLOC(sizeof(grn_token)))) { return NULL; }
  token->table = table;
  token->mode = mode;
  token->encoding = encoding;
  token->tokenizer = tokenizer;
  token->orig = str;
  token->orig_blen = str_len;
  token->curr = NULL;
  token->nstr = NULL;
  token->curr_size = 0;
  token->pos = -1;
  token->status = GRN_TOKEN_DOING;
  token->force_prefix = 0;
  if (tokenizer) {
    grn_obj str_, flags_;
    GRN_TEXT_INIT(&str_, GRN_OBJ_DO_SHALLOW_COPY);
    GRN_TEXT_SET_REF(&str_, str, str_len);
    GRN_UINT32_INIT(&flags_, 0);
    GRN_UINT32_SET(ctx, &flags_, flags);
    token->pctx.caller = NULL;
    token->pctx.user_data.ptr = NULL;
    token->pctx.proc = (grn_proc *)tokenizer;
    token->pctx.hooks = NULL;
    token->pctx.currh = NULL;
    token->pctx.phase = PROC_INIT;
    grn_ctx_push(ctx, &str_);
    grn_ctx_push(ctx, &flags_);
    ((grn_proc *)tokenizer)->funcs[PROC_INIT](ctx, 1, &table, &token->pctx.user_data);
    grn_obj_close(ctx, &flags_);
    grn_obj_close(ctx, &str_);
  } else {
    int nflags = 0;
    token->nstr = grn_string_open_(ctx, str, str_len,
                                   normalizer, nflags, token->encoding);
    if (token->nstr) {
      const char *normalized;
      grn_string_get_normalized(ctx, token->nstr,
                                &normalized, &(token->curr_size), NULL);
      token->curr = (const unsigned char *)normalized;
    } else {
      ERR(GRN_TOKENIZER_ERROR, "grn_string_open failed at grn_token_open");
    }
  }
  if (ctx->rc) {
    grn_token_close(ctx, token);
    token = NULL;
  }
  return token;
}
Exemplo n.º 14
0
static grn_obj *
command_tag_synonym_add(grn_ctx *ctx, GNUC_UNUSED int nargs, GNUC_UNUSED grn_obj **args,
                        grn_user_data *user_data)
{
  grn_obj *var, *proc, *table, *column;
  unsigned int nhooks = 0;
  char *table_name = NULL;
  unsigned int table_len = 0;
  char *column_name = NULL;
  unsigned int column_len = 0;

  var = grn_plugin_proc_get_var(ctx, user_data, "table", -1);
  if (GRN_TEXT_LEN(var) != 0) {
    table_name = GRN_TEXT_VALUE(var);
    table_len = GRN_TEXT_LEN(var);
  }
  var = grn_plugin_proc_get_var(ctx, user_data, "column", -1);
  if (GRN_TEXT_LEN(var) != 0) {
    column_name = GRN_TEXT_VALUE(var);
    column_len = GRN_TEXT_LEN(var);
  }

  table = grn_ctx_get(ctx, table_name, table_len);
  column = grn_obj_column(ctx, table, column_name, column_len);

  {
    grn_obj *range;
    grn_obj *col;
    grn_id range_id;

    range_id = grn_obj_get_range(ctx, column);
    range = grn_ctx_at(ctx, range_id);

    if (!range) {
      GRN_PLUGIN_LOG(ctx, GRN_LOG_ERROR,
                     "[tag-synonym] "
                     "hooked column must be reference type");
      return NULL;
    }

    col = grn_obj_column(ctx,
                         range,
                         SYNONYM_COLUMN_NAME,
                         SYNONYM_COLUMN_NAME_LEN);
    if (!col) {
      GRN_PLUGIN_LOG(ctx, GRN_LOG_ERROR,
                     "[tag-synonym] "
                     "couldn't open synonym column");
      return NULL;
    }
  }

  proc = grn_ctx_get(ctx, "tag_synonym", -1);
  {
    grn_obj data;
    default_set_value_hook_data hook_data = { grn_obj_id(ctx, proc), 0 };
    GRN_TEXT_INIT(&data, GRN_OBJ_DO_SHALLOW_COPY);
    GRN_TEXT_SET_REF(&data, &hook_data, sizeof(hook_data));
    grn_obj_add_hook(ctx, column, GRN_HOOK_SET, 0, proc, &data);
    grn_obj_unlink(ctx, &data);
  }

  //grn_ctx_output_array_open(ctx, "RESULT", 1);
  nhooks = grn_obj_get_nhooks(ctx, column, GRN_HOOK_SET);
  grn_ctx_output_int32(ctx, nhooks);
  //grn_ctx_output_array_close(ctx);

  return NULL;
}