static void update_data(grn_id record_id, unsigned int section, const gchar *old_name, const gchar *new_name) { grn_obj old_value, new_value; const gchar *old_data, *new_data; GRN_TEXT_INIT(&old_value, GRN_OBJ_DO_SHALLOW_COPY); GRN_TEXT_INIT(&new_value, GRN_OBJ_DO_SHALLOW_COPY); if (old_name) { old_data = cut_get_fixture_data_string(old_name, NULL); GRN_TEXT_SET_REF(&old_value, old_data, strlen(old_data)); } if (new_name) { new_data = cut_get_fixture_data_string(new_name, NULL); GRN_TEXT_SET_REF(&new_value, new_data, strlen(new_data)); } grn_ii_column_update(context, inverted_index, record_id, section, &old_value, &new_value, NULL); grn_obj_close(context, &old_value); grn_obj_close(context, &new_value); }
static grn_rc mecab_next(grn_ctx *ctx, grn_obj *table, grn_proc_data *user_data) { size_t cl; grn_mecab_tokenizer *token = user_data->ptr; const unsigned char *p = token->next, *r; const unsigned char *e = token->end; for (r = p; r < e; r += cl) { if (!(cl = grn_charlen_(ctx, (char *)r, (char *)e, token->encoding))) { token->next = (unsigned char *)e; break; } if (grn_isspace((const char *)r, token->encoding)) { const unsigned char *q = r; while ((cl = grn_isspace((const char *)q, token->encoding))) { q += cl; } token->next = (unsigned char *)q; break; } } GRN_TEXT_SET_REF(&token->curr_, p, r - p); GRN_UINT32_SET(ctx, &token->stat_, r == e ? GRN_TOKEN_LAST : 0); grn_ctx_push(ctx, &token->curr_); grn_ctx_push(ctx, &token->stat_); return GRN_SUCCESS; }
/* * @overload [](id) * @param id [Integer, Groonga::Record] The record ID for the * column value. * * @return [Object] The value for the record ID. */ static VALUE rb_grn_column_cache_array_reference (VALUE self, VALUE rb_id) { RbGrnColumnCache *rb_grn_column_cache; grn_id id; void *value; size_t value_size = 0; TypedData_Get_Struct(self, RbGrnColumnCache, &data_type, rb_grn_column_cache); if (!rb_grn_column_cache->column_cache) { return Qnil; } id = rb_grn_id_from_ruby_object(rb_id, rb_grn_column_cache->context, rb_grn_column_cache->table, self); value = grn_column_cache_ref(rb_grn_column_cache->context, rb_grn_column_cache->column_cache, id, &value_size); rb_grn_context_check(rb_grn_column_cache->context, self); GRN_TEXT_SET_REF(&(rb_grn_column_cache->buffer), value, value_size); return GRNBULK2RVAL(rb_grn_column_cache->context, &(rb_grn_column_cache->buffer), rb_grn_column_cache->range, self); }
/* This function returns tokens one by one. */ static grn_obj * mecab_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { size_t cl; /* grn_obj *table = args[0]; */ grn_mecab_tokenizer *token = user_data->ptr; char *p = token->next, *r; char *e = token->end; for (r = p; r < e; r += cl) { if (!(cl = grn_charlen_(ctx, r, e, token->encoding))) { token->next = e; break; } if (grn_isspace(r, token->encoding)) { char *q = r; while ((cl = grn_isspace(q, token->encoding))) { q += cl; } token->next = q; break; } } GRN_TEXT_SET_REF(&token->curr_, p, r - p); GRN_UINT32_SET(ctx, &token->stat_, r == e ? GRN_TOKEN_LAST : 0); grn_ctx_push(ctx, &token->curr_); grn_ctx_push(ctx, &token->stat_); return NULL; }
void test_read_write(gconstpointer *data) { gint i; int added; grn_ctx *context; grn_obj *table; const gchar *path; const gchar *value_string; gint process_number = 0; const gchar *process_number_string; const gchar table_name[] = "performance-read-write"; grn_obj value; grn_obj *retrieved_value; grn_id id; grn_rc rc; i = GPOINTER_TO_INT(data); process_number_string = g_getenv(GRN_TEST_ENV_PROCESS_NUMBER); if (process_number_string) process_number = atoi(process_number_string); rc = grn_ctx_init(&contexts[i], GRN_CTX_USE_QL); grn_test_assert(rc, cut_set_message("context: %d (%d)", i, process_number)); context = &contexts[i]; path = g_getenv(GRN_TEST_ENV_TABLE_PATH); cut_assert_not_null(path); tables[i] = grn_table_open(context, table_name, strlen(table_name), path); cut_assert_not_null(tables[i], cut_message("table: %d (%d)", i, process_number)); table = tables[i]; grn_test_assert_nil(grn_table_get(context, table, &i, sizeof(grn_id)), cut_message("lookup - fail: (%d:%d)", i, process_number)); value_string = cut_take_printf("value: (%d:%d)", i, process_number); id = grn_table_add(context, table, &i, sizeof(grn_id), &added); grn_test_assert_not_nil(id); cut_assert_equal_int(1, added); GRN_TEXT_INIT(&value, GRN_OBJ_DO_SHALLOW_COPY); GRN_TEXT_SET_REF(&value, value_string, strlen(value_string)); grn_obj_set_value(context, table, id, &value, GRN_OBJ_SET); retrieved_value = grn_obj_get_value(context, table, id, NULL); grn_test_assert_not_nil( id, cut_message("lookup - success: (%d:%d)", i, process_number)); GRN_TEXT_PUTC(context, retrieved_value, '\0'); cut_assert_equal_string(value_string, GRN_BULK_HEAD(retrieved_value)); tables[i] = NULL; grn_test_assert(grn_obj_close(context, table)); // contexts[i] = NULL; grn_test_assert(grn_ctx_fin(context)); }
static grn_obj * uvector_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_uvector_tokenizer_info *token = user_data->ptr; byte *p = token->curr + token->unit; if (token->tail < p) { GRN_TEXT_SET_REF(&token->curr_, token->curr, 0); GRN_UINT32_SET(ctx, &token->stat_, GRN_TOKEN_LAST); } else { GRN_TEXT_SET_REF(&token->curr_, token->curr, token->unit); token->curr = p; GRN_UINT32_SET(ctx, &token->stat_, token->tail == p ? GRN_TOKEN_LAST : 0); } grn_ctx_push(ctx, &token->curr_); grn_ctx_push(ctx, &token->stat_); return NULL; }
void grn_tokenizer_token_push(grn_ctx *ctx, grn_tokenizer_token *token, const char *str_ptr, unsigned int str_length, grn_token_status status) { GRN_TEXT_SET_REF(&token->str, str_ptr, str_length); GRN_UINT32_SET(ctx, &token->status, status); grn_ctx_push(ctx, &token->str); grn_ctx_push(ctx, &token->status); }
grn_obj * grn_index_sel(grn_ctx *ctx, grn_index *index, const char *string, unsigned int string_len) { grn_obj *res; grn_obj query; GRN_TEXT_INIT(&query, GRN_OBJ_DO_SHALLOW_COPY); GRN_TEXT_SET_REF(&query, string, string_len); if ((res = grn_table_create(ctx, NULL, 0, NULL, GRN_OBJ_TABLE_HASH_KEY, index->keys, 0))) { if ((grn_obj_search(ctx, index->inv, &query, res, GRN_SEL_OR, NULL))) { grn_obj_close(ctx, res); res = NULL; } } return res; }
grn_rc grn_index_upd(grn_ctx *ctx, grn_index *index, const char *key, const char *oldvalue, unsigned int oldvalue_len, const char *newvalue, unsigned int newvalue_len) { grn_id rid = grn_table_add(ctx, index->keys, key, strlen(key), NULL); if (rid) { grn_obj old, new; GRN_TEXT_INIT(&old, GRN_OBJ_DO_SHALLOW_COPY); GRN_TEXT_INIT(&new, GRN_OBJ_DO_SHALLOW_COPY); GRN_TEXT_SET_REF(&old, oldvalue, oldvalue_len); GRN_TEXT_SET_REF(&new, newvalue, newvalue_len); grn_column_index_update(ctx, index->inv, rid, 1, &old, &new); grn_obj_close(ctx, &old); grn_obj_close(ctx, &new); } return ctx->rc; }
grn_token * grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len, grn_token_mode mode) { grn_token *token; grn_encoding encoding; grn_obj *tokenizer; if (grn_table_get_info(ctx, table, NULL, &encoding, &tokenizer)) { return NULL; } if (!(token = GRN_MALLOC(sizeof(grn_token)))) { return NULL; } token->table = table; token->mode = mode; token->encoding = encoding; token->tokenizer = tokenizer; token->orig = str; token->orig_blen = str_len; token->curr = NULL; token->curr_size = 0; token->pos = -1; token->status = grn_token_doing; token->force_prefix = 0; if (tokenizer) { grn_obj str_; GRN_TEXT_INIT(&str_, GRN_OBJ_DO_SHALLOW_COPY); GRN_TEXT_SET_REF(&str_, str, str_len); token->pctx.caller = NULL; token->pctx.user_data.ptr = NULL; token->pctx.proc = (grn_proc *)tokenizer; token->pctx.hooks = NULL; token->pctx.currh = NULL; token->pctx.phase = PROC_INIT; grn_ctx_push(ctx, &str_); ((grn_proc *)tokenizer)->funcs[PROC_INIT](ctx, 1, &table, &token->pctx.user_data); grn_obj_close(ctx, &str_); } if (ctx->rc) { GRN_FREE(token); token = NULL; } return token; }
static grn_obj * delimited_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { size_t cl; grn_delimited_tokenizer *token = user_data->ptr; const unsigned char *p = token->next, *r; const unsigned char *e = token->end; for (r = p; r < e; r += cl) { if (!(cl = grn_charlen_(ctx, (char *)r, (char *)e, token->encoding))) { token->next = (unsigned char *)e; break; } if (r + token->delimiter_len <= e && !memcmp(r, token->delimiter, token->delimiter_len)) { token->next = r + token->delimiter_len; break; } } GRN_TEXT_SET_REF(&token->curr_, p, r - p); GRN_UINT32_SET(ctx, &token->stat_, r == e ? GRN_TOKEN_LAST : 0); grn_ctx_push(ctx, &token->curr_); grn_ctx_push(ctx, &token->stat_); return NULL; }
static grn_obj * ngram_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { size_t cl; grn_ngram_tokenizer *token = user_data->ptr; const unsigned char *p = token->next, *r = p, *e = token->end; int32_t len = 0, pos = token->pos + token->skip, status = 0; uint_least8_t *cp = token->ctypes ? token->ctypes + pos : NULL; if (cp && token->uni_alpha && GRN_STR_CTYPE(*cp) == grn_str_alpha) { while ((cl = grn_charlen_(ctx, (char *)r, (char *)e, token->encoding))) { len++; r += cl; if (/* !token->ignore_blank && */ GRN_STR_ISBLANK(*cp)) { break; } if (GRN_STR_CTYPE(*++cp) != grn_str_alpha) { break; } } token->next = r; token->overlap = 0; } else if (cp && token->uni_digit && GRN_STR_CTYPE(*cp) == grn_str_digit) { while ((cl = grn_charlen_(ctx, (char *)r, (char *)e, token->encoding))) { len++; r += cl; if (/* !token->ignore_blank && */ GRN_STR_ISBLANK(*cp)) { break; } if (GRN_STR_CTYPE(*++cp) != grn_str_digit) { break; } } token->next = r; token->overlap = 0; } else if (cp && token->uni_symbol && GRN_STR_CTYPE(*cp) == grn_str_symbol) { while ((cl = grn_charlen_(ctx, (char *)r, (char *)e, token->encoding))) { len++; r += cl; if (!token->ignore_blank && GRN_STR_ISBLANK(*cp)) { break; } if (GRN_STR_CTYPE(*++cp) != grn_str_symbol) { break; } } token->next = r; token->overlap = 0; } else { #ifdef PRE_DEFINED_UNSPLIT_WORDS const unsigned char *key = NULL; // todo : grn_pat_lcp_search if ((tid = grn_sym_common_prefix_search(sym, p))) { if (!(key = _grn_sym_key(sym, tid))) { token->status = grn_token_not_found; return NULL; } len = grn_str_len(key, token->encoding, NULL); } r = p + grn_charlen_(ctx, p, e, token->encoding); if (tid && (len > 1 || r == p)) { if (r != p && pos + len - 1 <= token->tail) { continue; } p += strlen(key); if (!*p && !token->add) { token->status = grn_token_done; } } #endif /* PRE_DEFINED_UNSPLIT_WORDS */ if ((cl = grn_charlen_(ctx, (char *)r, (char *)e, token->encoding))) { len++; r += cl; token->next = r; while (len < token->ngram_unit && (cl = grn_charlen_(ctx, (char *)r, (char *)e, token->encoding))) { if (cp) { if (!token->ignore_blank && GRN_STR_ISBLANK(*cp)) { break; } cp++; if ((token->uni_alpha && GRN_STR_CTYPE(*cp) == grn_str_alpha) || (token->uni_digit && GRN_STR_CTYPE(*cp) == grn_str_digit) || (token->uni_symbol && GRN_STR_CTYPE(*cp) == grn_str_symbol)) { break; } } len++; r += cl; } if (token->overlap) { status |= GRN_TOKEN_OVERLAP; } if (len < token->ngram_unit) { status |= GRN_TOKEN_UNMATURED; } token->overlap = (len > 1) ? 1 : 0; } } token->pos = pos; token->len = len; token->tail = pos + len - 1; if (p == r || token->next == e) { token->skip = 0; status |= GRN_TOKEN_LAST; } else { token->skip = token->overlap ? 1 : len; } if (r == e) { status |= GRN_TOKEN_REACH_END; } GRN_TEXT_SET_REF(&token->curr_, p, r - p); GRN_UINT32_SET(ctx, &token->stat_, status); grn_ctx_push(ctx, &token->curr_); grn_ctx_push(ctx, &token->stat_); return NULL; }
grn_token * grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len, grn_token_mode mode, unsigned int flags) { grn_token *token; grn_encoding encoding; grn_obj *tokenizer; grn_obj *normalizer; grn_obj_flags table_flags; if (grn_table_get_info(ctx, table, &table_flags, &encoding, &tokenizer, &normalizer)) { return NULL; } if (!(token = GRN_MALLOC(sizeof(grn_token)))) { return NULL; } token->table = table; token->mode = mode; token->encoding = encoding; token->tokenizer = tokenizer; token->orig = str; token->orig_blen = str_len; token->curr = NULL; token->nstr = NULL; token->curr_size = 0; token->pos = -1; token->status = GRN_TOKEN_DOING; token->force_prefix = 0; if (tokenizer) { grn_obj str_, flags_; GRN_TEXT_INIT(&str_, GRN_OBJ_DO_SHALLOW_COPY); GRN_TEXT_SET_REF(&str_, str, str_len); GRN_UINT32_INIT(&flags_, 0); GRN_UINT32_SET(ctx, &flags_, flags); token->pctx.caller = NULL; token->pctx.user_data.ptr = NULL; token->pctx.proc = (grn_proc *)tokenizer; token->pctx.hooks = NULL; token->pctx.currh = NULL; token->pctx.phase = PROC_INIT; grn_ctx_push(ctx, &str_); grn_ctx_push(ctx, &flags_); ((grn_proc *)tokenizer)->funcs[PROC_INIT](ctx, 1, &table, &token->pctx.user_data); grn_obj_close(ctx, &flags_); grn_obj_close(ctx, &str_); } else { int nflags = 0; token->nstr = grn_string_open_(ctx, str, str_len, normalizer, nflags, token->encoding); if (token->nstr) { const char *normalized; grn_string_get_normalized(ctx, token->nstr, &normalized, &(token->curr_size), NULL); token->curr = (const unsigned char *)normalized; } else { ERR(GRN_TOKENIZER_ERROR, "grn_string_open failed at grn_token_open"); } } if (ctx->rc) { grn_token_close(ctx, token); token = NULL; } return token; }
static grn_obj * command_tag_synonym_add(grn_ctx *ctx, GNUC_UNUSED int nargs, GNUC_UNUSED grn_obj **args, grn_user_data *user_data) { grn_obj *var, *proc, *table, *column; unsigned int nhooks = 0; char *table_name = NULL; unsigned int table_len = 0; char *column_name = NULL; unsigned int column_len = 0; var = grn_plugin_proc_get_var(ctx, user_data, "table", -1); if (GRN_TEXT_LEN(var) != 0) { table_name = GRN_TEXT_VALUE(var); table_len = GRN_TEXT_LEN(var); } var = grn_plugin_proc_get_var(ctx, user_data, "column", -1); if (GRN_TEXT_LEN(var) != 0) { column_name = GRN_TEXT_VALUE(var); column_len = GRN_TEXT_LEN(var); } table = grn_ctx_get(ctx, table_name, table_len); column = grn_obj_column(ctx, table, column_name, column_len); { grn_obj *range; grn_obj *col; grn_id range_id; range_id = grn_obj_get_range(ctx, column); range = grn_ctx_at(ctx, range_id); if (!range) { GRN_PLUGIN_LOG(ctx, GRN_LOG_ERROR, "[tag-synonym] " "hooked column must be reference type"); return NULL; } col = grn_obj_column(ctx, range, SYNONYM_COLUMN_NAME, SYNONYM_COLUMN_NAME_LEN); if (!col) { GRN_PLUGIN_LOG(ctx, GRN_LOG_ERROR, "[tag-synonym] " "couldn't open synonym column"); return NULL; } } proc = grn_ctx_get(ctx, "tag_synonym", -1); { grn_obj data; default_set_value_hook_data hook_data = { grn_obj_id(ctx, proc), 0 }; GRN_TEXT_INIT(&data, GRN_OBJ_DO_SHALLOW_COPY); GRN_TEXT_SET_REF(&data, &hook_data, sizeof(hook_data)); grn_obj_add_hook(ctx, column, GRN_HOOK_SET, 0, proc, &data); grn_obj_unlink(ctx, &data); } //grn_ctx_output_array_open(ctx, "RESULT", 1); nhooks = grn_obj_get_nhooks(ctx, column, GRN_HOOK_SET); grn_ctx_output_int32(ctx, nhooks); //grn_ctx_output_array_close(ctx); return NULL; }