static grn_obj * command_ruby_eval(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_obj *script; mrb_value result; script = VAR(0); switch (script->header.domain) { case GRN_DB_SHORT_TEXT : case GRN_DB_TEXT : case GRN_DB_LONG_TEXT : break; default : { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, script); ERR(GRN_INVALID_ARGUMENT, "script must be a string: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } break; } result = grn_mrb_eval(ctx, GRN_TEXT_VALUE(script), GRN_TEXT_LEN(script)); output_result(ctx, result); return NULL; }
static grn_id parse_id_value(grn_ctx *ctx, grn_obj *value) { switch (value->header.type) { case GRN_DB_UINT32 : return GRN_UINT32_VALUE(value); case GRN_DB_INT32 : return GRN_INT32_VALUE(value); default : { grn_id id = GRN_ID_NIL; grn_obj casted_value; GRN_UINT32_INIT(&casted_value, 0); if (grn_obj_cast(ctx, value, &casted_value, GRN_FALSE) != GRN_SUCCESS) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, value); ERR(GRN_INVALID_ARGUMENT, "<%s>: failed to cast to <UInt32>: <%.*s>", GRN_COLUMN_NAME_ID, (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); } else { id = GRN_UINT32_VALUE(&casted_value); } GRN_OBJ_FIN(ctx, &casted_value); return id; } } }
static grn_rc grn_vector_inspect(grn_ctx *ctx, grn_obj *buffer, grn_obj *vector) { int i; grn_obj *body = vector->u.v.body; GRN_TEXT_PUTS(ctx, buffer, "["); for (i = 0; i < vector->u.v.n_sections; i++) { grn_section *section = &(vector->u.v.sections[i]); const char *value_raw; if (i > 0) { GRN_TEXT_PUTS(ctx, buffer, ", "); } value_raw = GRN_BULK_HEAD(body) + section->offset; GRN_TEXT_PUTS(ctx, buffer, "{"); GRN_TEXT_PUTS(ctx, buffer, "\"value\":"); { grn_obj value_object; GRN_OBJ_INIT(&value_object, GRN_BULK, GRN_OBJ_DO_SHALLOW_COPY, section->domain); grn_bulk_write(ctx, &value_object, value_raw, section->length); grn_inspect(ctx, buffer, &value_object); GRN_OBJ_FIN(ctx, &value_object); } GRN_TEXT_PUTS(ctx, buffer, ", \"weight\":"); grn_text_itoa(ctx, buffer, section->weight); GRN_TEXT_PUTS(ctx, buffer, "}"); } GRN_TEXT_PUTS(ctx, buffer, "]"); return GRN_SUCCESS; }
void grn_test_assert_equal_id_helper(grn_ctx *context, grn_id expected, grn_id actual, const gchar *expression_context, const gchar *expression_expected, const gchar *expression_actual) { if (expected == actual) { cut_test_pass(); } else { grn_obj *expected_object, *actual_object; grn_obj inspected_expected_object, inspected_actual_object; expected_object = grn_ctx_at(context, expected); actual_object = grn_ctx_at(context, actual); GRN_TEXT_INIT(&inspected_expected_object, 0); GRN_TEXT_INIT(&inspected_actual_object, 0); grn_inspect(context, &inspected_expected_object, expected_object); grn_inspect(context, &inspected_actual_object, actual_object); cut_set_expected( cut_take_printf("%d: <%.*s>", expected, (int)GRN_TEXT_LEN(&inspected_expected_object), GRN_TEXT_VALUE(&inspected_expected_object))); cut_set_actual( cut_take_printf("%d: <%.*s>", actual, (int)GRN_TEXT_LEN(&inspected_actual_object), GRN_TEXT_VALUE(&inspected_actual_object))); grn_obj_unlink(context, &inspected_expected_object); grn_obj_unlink(context, &inspected_actual_object); grn_obj_unlink(context, expected_object); grn_obj_unlink(context, actual_object); cut_test_fail(cut_take_printf("<%s> == <%s> (%s)\n" " context: <%p>", expression_expected, expression_actual, expression_context, context)); } }
static mrb_value object_grn_inspect(mrb_state *mrb, mrb_value self) { grn_ctx *ctx = (grn_ctx *)mrb->ud; grn_obj buffer; mrb_value inspected; GRN_TEXT_INIT(&buffer, 0); grn_inspect(ctx, &buffer, DATA_PTR(self)); inspected = mrb_str_new(mrb, GRN_TEXT_VALUE(&buffer), GRN_TEXT_LEN(&buffer)); GRN_OBJ_FIN(ctx, &buffer); return inspected; }
static grn_obj * func_vector_size(grn_ctx *ctx, int n_args, grn_obj **args, grn_user_data *user_data) { grn_obj *target; unsigned int size; grn_obj *grn_size; if (n_args != 1) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_size(): wrong number of arguments (%d for 1)", n_args); return NULL; } target = args[0]; switch (target->header.type) { case GRN_VECTOR : case GRN_PVECTOR : case GRN_UVECTOR : size = grn_vector_size(ctx, target); break; default : { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, target, &inspected); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_size(): target object must be vector: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } break; } grn_size = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_UINT32, 0); if (!grn_size) { return NULL; } GRN_UINT32_SET(ctx, grn_size, size); return grn_size; }
static VALUE rb_grn_expression_inspect (VALUE self) { grn_ctx *context = NULL; grn_obj *expression; grn_obj inspected; VALUE rb_inspected; rb_grn_expression_deconstruct(SELF(self), &expression, &context, NULL, NULL, NULL, NULL, NULL); rb_inspected = rb_str_new_cstr(""); rb_grn_object_inspect_header(self, rb_inspected); GRN_TEXT_INIT(&inspected, 0); grn_inspect(context, &inspected, expression); grn_bulk_truncate(context, &inspected, GRN_TEXT_LEN(&inspected) - 2); { size_t prefix_length; const char *content; size_t content_length; prefix_length = strlen("#<expr"); content = GRN_TEXT_VALUE(&inspected) + prefix_length; content_length = GRN_TEXT_LEN(&inspected) - prefix_length; rb_str_concat(rb_inspected, rb_grn_context_rb_string_new(context, content, content_length)); } GRN_OBJ_FIN(context, &inspected); rb_grn_object_inspect_footer(self, rb_inspected); return rb_inspected; }
static grn_obj * func_highlight_create_keywords_table(grn_ctx *ctx, grn_user_data *user_data, const char *normalizer_name, unsigned int normalizer_name_length) { grn_obj *keywords; keywords = grn_table_create(ctx, NULL, 0, NULL, GRN_OBJ_TABLE_PAT_KEY, grn_ctx_at(ctx, GRN_DB_SHORT_TEXT), NULL); if (normalizer_name_length > 0) { grn_obj *normalizer; normalizer = grn_ctx_get(ctx, normalizer_name, normalizer_name_length); if (!grn_obj_is_normalizer_proc(ctx, normalizer)) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, normalizer); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "highlight_full() not normalizer: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); grn_obj_unlink(ctx, normalizer); grn_obj_unlink(ctx, keywords); return NULL; } grn_obj_set_info(ctx, keywords, GRN_INFO_NORMALIZER, normalizer); grn_obj_unlink(ctx, normalizer); } return keywords; }
void grn_test_assert_equal_record_id_helper(grn_ctx *context, grn_obj *table, grn_id expected, grn_id actual, const gchar *expression_context, const gchar *expression_table, const gchar *expression_expected, const gchar *expression_actual) { if (expected == actual) { cut_test_pass(); } else { gchar key[GRN_TABLE_MAX_KEY_SIZE]; int key_size; const gchar *message; grn_obj inspected_table; key_size = grn_table_get_key(context, table, expected, key, sizeof(key)); cut_set_expected(cut_take_printf("%d: <%.*s>", expected, key_size, key)); key_size = grn_table_get_key(context, table, actual, key, sizeof(key)); cut_set_actual(cut_take_printf("%d: <%.*s>", actual, key_size, key)); GRN_TEXT_INIT(&inspected_table, 0); grn_inspect(context, &inspected_table, table); message = cut_take_printf("<%s> == <%s> (context: <%s>, table: <%s>)\n" " context: <%p>\n" " table: <%.*s>", expression_expected, expression_actual, expression_context, expression_table, context, (int)GRN_TEXT_LEN(&inspected_table), GRN_TEXT_VALUE(&inspected_table)); grn_obj_unlink(context, &inspected_table); cut_test_fail(message); } }
static grn_obj * func_time_classify_raw(grn_ctx *ctx, int n_args, grn_obj **args, grn_user_data *user_data, const char *function_name, grn_time_classify_unit unit) { grn_obj *time; uint32_t interval_raw = 1; grn_obj *classed_time; grn_bool accept_interval = GRN_TRUE; switch (unit) { case GRN_TIME_CLASSIFY_UNIT_SECOND : case GRN_TIME_CLASSIFY_UNIT_MINUTE : case GRN_TIME_CLASSIFY_UNIT_HOUR : accept_interval = GRN_TRUE; break; case GRN_TIME_CLASSIFY_UNIT_DAY : case GRN_TIME_CLASSIFY_UNIT_WEEK : accept_interval = GRN_FALSE; break; case GRN_TIME_CLASSIFY_UNIT_MONTH : case GRN_TIME_CLASSIFY_UNIT_YEAR : accept_interval = GRN_TRUE; break; } if (accept_interval) { if (!(n_args == 1 || n_args == 2)) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "%s(): " "wrong number of arguments (%d for 1..2)", function_name, n_args); return NULL; } } else { if (n_args != 1) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "%s(): " "wrong number of arguments (%d for 1)", function_name, n_args); return NULL; } } time = args[0]; if (!(time->header.type == GRN_BULK && time->header.domain == GRN_DB_TIME)) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, time); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "%s(): " "the first argument must be a time: " "<%.*s>", function_name, (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } if (n_args == 2) { grn_obj *interval; grn_obj casted_interval; interval = args[1]; if (!(interval->header.type == GRN_BULK && grn_type_id_is_number_family(ctx, interval->header.domain))) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, interval); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "%s(): " "the second argument must be a number: " "<%.*s>", function_name, (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } GRN_VALUE_FIX_SIZE_INIT(&casted_interval, 0, GRN_DB_UINT32); grn_obj_cast(ctx, interval, &casted_interval, GRN_FALSE); interval_raw = GRN_UINT32_VALUE(&casted_interval); GRN_OBJ_FIN(ctx, &casted_interval); } { int64_t time_raw; struct tm tm; int64_t classed_time_raw; time_raw = GRN_TIME_VALUE(time); if (!grn_time_to_tm(ctx, time_raw, &tm)) { return NULL; } switch (unit) { case GRN_TIME_CLASSIFY_UNIT_SECOND : tm.tm_sec = (tm.tm_sec / interval_raw) * interval_raw; break; case GRN_TIME_CLASSIFY_UNIT_MINUTE : tm.tm_min = (tm.tm_min / interval_raw) * interval_raw; tm.tm_sec = 0; break; case GRN_TIME_CLASSIFY_UNIT_HOUR : tm.tm_hour = (tm.tm_hour / interval_raw) * interval_raw; tm.tm_min = 0; tm.tm_sec = 0; break; case GRN_TIME_CLASSIFY_UNIT_DAY : tm.tm_hour = 0; tm.tm_min = 0; tm.tm_sec = 0; break; case GRN_TIME_CLASSIFY_UNIT_WEEK : if ((tm.tm_mday - tm.tm_wday) >= 0) { tm.tm_mday -= tm.tm_wday; } else { int n_underflowed_mday = -(tm.tm_mday - tm.tm_wday); int mday; int max_mday = 31; if (tm.tm_mon == 0) { tm.tm_year--; tm.tm_mon = 11; } else { tm.tm_mon--; } for (mday = max_mday; mday > n_underflowed_mday; mday--) { int64_t unused; tm.tm_mday = mday; if (grn_time_from_tm(ctx, &unused, &tm)) { break; } } tm.tm_mday -= n_underflowed_mday; } tm.tm_hour = 0; tm.tm_min = 0; tm.tm_sec = 0; break; case GRN_TIME_CLASSIFY_UNIT_MONTH : tm.tm_mon = (tm.tm_mon / interval_raw) * interval_raw; tm.tm_mday = 1; tm.tm_hour = 0; tm.tm_min = 0; tm.tm_sec = 0; break; case GRN_TIME_CLASSIFY_UNIT_YEAR : tm.tm_year = (((1900 + tm.tm_year) / interval_raw) * interval_raw) - 1900; tm.tm_mon = 0; tm.tm_mday = 1; tm.tm_hour = 0; tm.tm_min = 0; tm.tm_sec = 0; break; } if (!grn_time_from_tm(ctx, &classed_time_raw, &tm)) { return NULL; } classed_time = grn_plugin_proc_alloc(ctx, user_data, time->header.domain, 0); if (!classed_time) { return NULL; } GRN_TIME_SET(ctx, classed_time, classed_time_raw); return classed_time; } }
/* TODO: support caching for the same parameter. */ static grn_obj * func_snippet(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_obj *snippets = NULL; #define N_REQUIRED_ARGS 1 #define KEYWORD_SET_SIZE 3 if (nargs > N_REQUIRED_ARGS) { grn_obj *text = args[0]; grn_obj *end_arg = args[nargs - 1]; grn_obj *snip = NULL; unsigned int width = 200; unsigned int max_n_results = 3; grn_snip_mapping *mapping = NULL; int flags = GRN_SNIP_SKIP_LEADING_SPACES; const char *prefix = NULL; int prefix_length = 0; const char *suffix = NULL; int suffix_length = 0; const char *normalizer_name = NULL; int normalizer_name_length = 0; const char *default_open_tag = NULL; int default_open_tag_length = 0; const char *default_close_tag = NULL; int default_close_tag_length = 0; int n_args_without_option = nargs; if (end_arg->header.type == GRN_TABLE_HASH_KEY) { grn_obj *options = end_arg; grn_hash_cursor *cursor; void *key; int key_size; grn_obj *value; n_args_without_option--; cursor = grn_hash_cursor_open(ctx, (grn_hash *)options, NULL, 0, NULL, 0, 0, -1, 0); if (!cursor) { GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE, "snippet(): couldn't open cursor"); goto exit; } while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) { grn_hash_cursor_get_key_value(ctx, cursor, &key, &key_size, (void **)&value); if (key_size == 5 && !memcmp(key, "width", 5)) { width = GRN_UINT32_VALUE(value); } else if (key_size == 13 && !memcmp(key, "max_n_results", 13)) { max_n_results = GRN_UINT32_VALUE(value); } else if (key_size == 19 && !memcmp(key, "skip_leading_spaces", 19)) { if (GRN_BOOL_VALUE(value) == GRN_FALSE) { flags &= ~GRN_SNIP_SKIP_LEADING_SPACES; } } else if (key_size == 11 && !memcmp(key, "html_escape", 11)) { if (GRN_BOOL_VALUE(value)) { mapping = GRN_SNIP_MAPPING_HTML_ESCAPE; } } else if (key_size == 6 && !memcmp(key, "prefix", 6)) { prefix = GRN_TEXT_VALUE(value); prefix_length = GRN_TEXT_LEN(value); } else if (key_size == 6 && !memcmp(key, "suffix", 6)) { suffix = GRN_TEXT_VALUE(value); suffix_length = GRN_TEXT_LEN(value); } else if (key_size == 10 && !memcmp(key, "normalizer", 10)) { normalizer_name = GRN_TEXT_VALUE(value); normalizer_name_length = GRN_TEXT_LEN(value); } else if (key_size == 16 && !memcmp(key, "default_open_tag", 16)) { default_open_tag = GRN_TEXT_VALUE(value); default_open_tag_length = GRN_TEXT_LEN(value); } else if (key_size == 17 && !memcmp(key, "default_close_tag", 17)) { default_close_tag = GRN_TEXT_VALUE(value); default_close_tag_length = GRN_TEXT_LEN(value); } else { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "invalid option name: <%.*s>", key_size, (char *)key); grn_hash_cursor_close(ctx, cursor); goto exit; } } grn_hash_cursor_close(ctx, cursor); } snip = grn_snip_open(ctx, flags, width, max_n_results, default_open_tag, default_open_tag_length, default_close_tag, default_close_tag_length, mapping); if (snip) { grn_rc rc; unsigned int i; if (!normalizer_name) { grn_snip_set_normalizer(ctx, snip, GRN_NORMALIZER_AUTO); } else if (normalizer_name_length > 0) { grn_obj *normalizer; normalizer = grn_ctx_get(ctx, normalizer_name, normalizer_name_length); if (!grn_obj_is_normalizer_proc(ctx, normalizer)) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, normalizer); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "snippet(): not normalizer: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); grn_obj_unlink(ctx, normalizer); goto exit; } grn_snip_set_normalizer(ctx, snip, normalizer); grn_obj_unlink(ctx, normalizer); } if (default_open_tag_length == 0 && default_close_tag_length == 0) { unsigned int n_keyword_sets = (n_args_without_option - N_REQUIRED_ARGS) / KEYWORD_SET_SIZE; grn_obj **keyword_set_args = args + N_REQUIRED_ARGS; for (i = 0; i < n_keyword_sets; i++) { rc = grn_snip_add_cond(ctx, snip, GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE]), GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE]), GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE + 1]), GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE + 1]), GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE + 2]), GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE + 2])); } } else { unsigned int n_keywords = n_args_without_option - N_REQUIRED_ARGS; grn_obj **keyword_args = args + N_REQUIRED_ARGS; for (i = 0; i < n_keywords; i++) { rc = grn_snip_add_cond(ctx, snip, GRN_TEXT_VALUE(keyword_args[i]), GRN_TEXT_LEN(keyword_args[i]), NULL, 0, NULL, 0); } } snippets = snippet_exec(ctx, snip, text, user_data, prefix, prefix_length, suffix, suffix_length); } } #undef KEYWORD_SET_SIZE #undef N_REQUIRED_ARGS exit : if (!snippets) { snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); } return snippets; }
static grn_rc selector_fuzzy_search(grn_ctx *ctx, grn_obj *table, grn_obj *index, int nargs, grn_obj **args, grn_obj *res, grn_operator op) { grn_rc rc = GRN_SUCCESS; grn_obj *target = NULL; grn_obj *obj; grn_obj *query; uint32_t max_distance = 1; uint32_t prefix_length = 0; uint32_t prefix_match_size = 0; uint32_t max_expansion = 0; int flags = 0; grn_bool use_sequential_search = GRN_FALSE; if ((nargs - 1) < 2) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "fuzzy_search(): wrong number of arguments (%d ...)", nargs - 1); rc = ctx->rc; goto exit; } obj = args[1]; query = args[2]; if (nargs == 4) { grn_obj *options = args[3]; grn_hash_cursor *cursor; void *key; grn_obj *value; int key_size; if (options->header.type != GRN_TABLE_HASH_KEY) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "fuzzy_search(): " "3rd argument must be object literal: <%.*s>", (int)GRN_TEXT_LEN(options), GRN_TEXT_VALUE(options)); goto exit; } cursor = grn_hash_cursor_open(ctx, (grn_hash *)options, NULL, 0, NULL, 0, 0, -1, 0); if (!cursor) { GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE, "fuzzy_search(): couldn't open cursor"); goto exit; } while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) { grn_hash_cursor_get_key_value(ctx, cursor, &key, &key_size, (void **)&value); if (key_size == 12 && !memcmp(key, "max_distance", 12)) { max_distance = GRN_UINT32_VALUE(value); } else if (key_size == 13 && !memcmp(key, "prefix_length", 13)) { prefix_length = GRN_UINT32_VALUE(value); } else if (key_size == 13 && !memcmp(key, "max_expansion", 13)) { max_expansion = GRN_UINT32_VALUE(value); } else if (key_size == 18 && !memcmp(key, "with_transposition", 18)) { if (GRN_BOOL_VALUE(value)) { flags |= GRN_TABLE_FUZZY_SEARCH_WITH_TRANSPOSITION; } } else { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "invalid option name: <%.*s>", key_size, (char *)key); grn_hash_cursor_close(ctx, cursor); goto exit; } } grn_hash_cursor_close(ctx, cursor); } if (index) { target = index; } else { if (obj->header.type == GRN_COLUMN_INDEX) { target = obj; } else { grn_column_index(ctx, obj, GRN_OP_FUZZY, &target, 1, NULL); } } if (target) { grn_obj *lexicon; use_sequential_search = GRN_TRUE; lexicon = grn_ctx_at(ctx, target->header.domain); if (lexicon) { if (lexicon->header.type == GRN_TABLE_PAT_KEY) { use_sequential_search = GRN_FALSE; } grn_obj_unlink(ctx, lexicon); } } else { if (grn_obj_is_key_accessor(ctx, obj) && table->header.type == GRN_TABLE_PAT_KEY) { target = table; } else { use_sequential_search = GRN_TRUE; } } if (prefix_length) { const char *s = GRN_TEXT_VALUE(query); const char *e = GRN_BULK_CURR(query); const char *p; unsigned int cl = 0; unsigned int length = 0; for (p = s; p < e && (cl = grn_charlen(ctx, p, e)); p += cl) { length++; if (length > prefix_length) { break; } } prefix_match_size = p - s; } if (use_sequential_search) { rc = sequential_fuzzy_search(ctx, table, obj, query, max_distance, prefix_match_size, max_expansion, flags, res, op); goto exit; } if (!target) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, target); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "fuzzy_search(): " "column must be COLUMN_INDEX or TABLE_PAT_KEY: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); rc = ctx->rc; GRN_OBJ_FIN(ctx, &inspected); } else { grn_search_optarg options = {0}; options.mode = GRN_OP_FUZZY; options.fuzzy.prefix_match_size = prefix_match_size; options.fuzzy.max_distance = max_distance; options.fuzzy.max_expansion = max_expansion; options.fuzzy.flags = flags; grn_obj_search(ctx, target, query, res, op, &options); } exit : return rc; }
static void brace_close(grn_ctx *ctx, grn_loader *loader) { grn_id id = GRN_ID_NIL; grn_obj *value, *value_begin, *value_end; grn_obj *id_value = NULL, *key_value = NULL; uint32_t begin; GRN_UINT32_POP(&loader->level, begin); value_begin = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + begin; value_end = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + loader->values_size; GRN_ASSERT(value->header.domain == GRN_JSON_LOAD_OPEN_BRACE); GRN_UINT32_SET(ctx, value_begin, loader->values_size - begin - 1); value_begin++; if (GRN_BULK_VSIZE(&loader->level) > sizeof(uint32_t) * loader->emit_level) { return; } if (!loader->table) { goto exit; } /* Scan values to find _id or _key. */ for (value = value_begin; value + 1 < value_end; value = values_next(ctx, value)) { const char *name = GRN_TEXT_VALUE(value); unsigned int name_size = GRN_TEXT_LEN(value); if (value->header.domain != GRN_DB_TEXT) { grn_obj buffer; GRN_TEXT_INIT(&buffer, 0); grn_inspect(ctx, &buffer, value); GRN_LOG(ctx, GRN_LOG_ERROR, "column name must be string: <%.*s>", (int)GRN_TEXT_LEN(&buffer), GRN_TEXT_VALUE(&buffer)); GRN_OBJ_FIN(ctx, &buffer); goto exit; } value++; if (name_equal(name, name_size, GRN_COLUMN_NAME_ID)) { if (id_value || key_value) { if (loader->table->header.type == GRN_TABLE_NO_KEY) { GRN_LOG(ctx, GRN_LOG_ERROR, "duplicated '_id' column"); goto exit; } else { GRN_LOG(ctx, GRN_LOG_ERROR, "duplicated key columns: %s and %s", id_value ? GRN_COLUMN_NAME_ID : GRN_COLUMN_NAME_KEY, GRN_COLUMN_NAME_ID); goto exit; } } id_value = value; } else if (name_equal(name, name_size, GRN_COLUMN_NAME_KEY)) { if (id_value || key_value) { GRN_LOG(ctx, GRN_LOG_ERROR, "duplicated key columns: %s and %s", id_value ? GRN_COLUMN_NAME_ID : GRN_COLUMN_NAME_KEY, GRN_COLUMN_NAME_KEY); goto exit; } key_value = value; } } switch (loader->table->header.type) { case GRN_TABLE_HASH_KEY : case GRN_TABLE_PAT_KEY : case GRN_TABLE_DAT_KEY : /* The target table requires _id or _key. */ if (!id_value && !key_value) { GRN_LOG(ctx, GRN_LOG_ERROR, "neither _key nor _id is assigned"); goto exit; } break; default : /* The target table does not have _key. */ if (key_value) { GRN_LOG(ctx, GRN_LOG_ERROR, "nonexistent key value"); goto exit; } break; } if (id_value) { id = parse_id_value(ctx, id_value); if (grn_table_at(ctx, loader->table, id) == GRN_ID_NIL) { if (ctx->rc == GRN_SUCCESS) { id = grn_table_add(ctx, loader->table, NULL, 0, NULL); } } } else if (key_value) { id = loader_add(ctx, key_value); } else { id = grn_table_add(ctx, loader->table, NULL, 0, NULL); } if (id == GRN_ID_NIL) { /* Target record is not available. */ goto exit; } for (value = value_begin; value + 1 < value_end; value = values_next(ctx, value)) { grn_obj *col; const char *name = GRN_TEXT_VALUE(value); unsigned int name_size = GRN_TEXT_LEN(value); value++; if (value == id_value || value == key_value) { /* Skip _id and _key, because it's already used to get id. */ continue; } col = grn_obj_column(ctx, loader->table, name, name_size); if (!col) { GRN_LOG(ctx, GRN_LOG_ERROR, "invalid column('%.*s')", (int)name_size, name); /* Automatic column creation is disabled. */ /* if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET) { grn_obj *v = value + 1; col = grn_column_create(ctx, loader->table, name, name_size, NULL, GRN_OBJ_PERSISTENT|GRN_OBJ_COLUMN_VECTOR, grn_ctx_at(ctx, v->header.domain)); } else { col = grn_column_create(ctx, loader->table, name, name_size, NULL, GRN_OBJ_PERSISTENT, grn_ctx_at(ctx, value->header.domain)); } */ } else { if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET) { set_vector(ctx, col, id, value); } else if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACE) { set_weight_vector(ctx, col, id, value); } else { grn_obj_set_value(ctx, col, id, value, GRN_OBJ_SET); } if (ctx->rc != GRN_SUCCESS) { grn_loader_save_error(ctx, loader); report_set_column_value_failure(ctx, key_value, name, name_size, value); loader->n_column_errors++; ERRCLR(ctx); } grn_obj_unlink(ctx, col); } } if (loader->each) { value = grn_expr_get_var_by_offset(ctx, loader->each, 0); GRN_RECORD_SET(ctx, value, id); grn_expr_exec(ctx, loader->each, 0); } loader->nrecords++; exit: if (ctx->rc != GRN_SUCCESS) { loader->n_record_errors++; } if (loader->output_ids) { GRN_UINT32_PUT(ctx, &(loader->ids), id); } if (loader->output_errors) { GRN_INT32_PUT(ctx, &(loader->return_codes), ctx->rc); grn_vector_add_element(ctx, &(loader->error_messages), ctx->errbuf, strlen(ctx->errbuf), 0, GRN_DB_TEXT); } loader->values_size = begin; ERRCLR(ctx); }
static void bracket_close(grn_ctx *ctx, grn_loader *loader) { grn_id id = GRN_ID_NIL; grn_obj *value, *value_end, *id_value = NULL, *key_value = NULL; grn_obj *col, **cols; /* Columns except _id and _key. */ uint32_t i, begin; uint32_t ncols; /* Number of columns except _id and _key. */ uint32_t nvalues; /* Number of values in brackets. */ uint32_t depth; grn_bool is_record_load = GRN_FALSE; cols = (grn_obj **)GRN_BULK_HEAD(&loader->columns); ncols = GRN_BULK_VSIZE(&loader->columns) / sizeof(grn_obj *); GRN_UINT32_POP(&loader->level, begin); value = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + begin; value_end = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + loader->values_size; GRN_ASSERT(value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET); GRN_UINT32_SET(ctx, value, loader->values_size - begin - 1); value++; depth = GRN_BULK_VSIZE(&loader->level); if (depth > sizeof(uint32_t) * loader->emit_level) { return; } if (depth == 0 || !loader->table || loader->columns_status == GRN_LOADER_COLUMNS_BROKEN) { goto exit; } nvalues = values_len(ctx, value, value_end); if (loader->columns_status == GRN_LOADER_COLUMNS_UNSET) { /* * Target columns and _id or _key are not specified yet and values are * handled as column names and "_id" or "_key". */ for (i = 0; i < nvalues; i++) { const char *col_name; unsigned int col_name_size; if (value->header.domain != GRN_DB_TEXT) { grn_obj buffer; GRN_TEXT_INIT(&buffer, 0); grn_inspect(ctx, &buffer, value); ERR(GRN_INVALID_ARGUMENT, "column name must be string: <%.*s>", (int)GRN_TEXT_LEN(&buffer), GRN_TEXT_VALUE(&buffer)); grn_loader_save_error(ctx, loader); GRN_OBJ_FIN(ctx, &buffer); loader->columns_status = GRN_LOADER_COLUMNS_BROKEN; goto exit; } col_name = GRN_TEXT_VALUE(value); col_name_size = GRN_TEXT_LEN(value); col = grn_obj_column(ctx, loader->table, col_name, col_name_size); if (!col) { ERR(GRN_INVALID_ARGUMENT, "nonexistent column: <%.*s>", col_name_size, col_name); grn_loader_save_error(ctx, loader); loader->columns_status = GRN_LOADER_COLUMNS_BROKEN; goto exit; } if (name_equal(col_name, col_name_size, GRN_COLUMN_NAME_ID)) { grn_obj_unlink(ctx, col); if (loader->id_offset != -1 || loader->key_offset != -1) { /* _id and _key must not appear more than once. */ if (loader->id_offset != -1) { ERR(GRN_INVALID_ARGUMENT, "duplicated id and key columns: <%s> at %d and <%s> at %d", GRN_COLUMN_NAME_ID, i, GRN_COLUMN_NAME_ID, loader->id_offset); } else { ERR(GRN_INVALID_ARGUMENT, "duplicated id and key columns: <%s> at %d and <%s> at %d", GRN_COLUMN_NAME_ID, i, GRN_COLUMN_NAME_KEY, loader->key_offset); } grn_loader_save_error(ctx, loader); loader->columns_status = GRN_LOADER_COLUMNS_BROKEN; goto exit; } loader->id_offset = i; } else if (name_equal(col_name, col_name_size, GRN_COLUMN_NAME_KEY)) { grn_obj_unlink(ctx, col); if (loader->id_offset != -1 || loader->key_offset != -1) { /* _id and _key must not appear more than once. */ if (loader->id_offset != -1) { ERR(GRN_INVALID_ARGUMENT, "duplicated id and key columns: <%s> at %d and <%s> at %d", GRN_COLUMN_NAME_KEY, i, GRN_COLUMN_NAME_ID, loader->id_offset); } else { ERR(GRN_INVALID_ARGUMENT, "duplicated id and key columns: <%s> at %d and <%s> at %d", GRN_COLUMN_NAME_KEY, i, GRN_COLUMN_NAME_KEY, loader->key_offset); } grn_loader_save_error(ctx, loader); loader->columns_status = GRN_LOADER_COLUMNS_BROKEN; goto exit; } loader->key_offset = i; } else { GRN_PTR_PUT(ctx, &loader->columns, col); } value++; } switch (loader->table->header.type) { case GRN_TABLE_HASH_KEY : case GRN_TABLE_PAT_KEY : case GRN_TABLE_DAT_KEY : if (loader->id_offset == -1 && loader->key_offset == -1) { ERR(GRN_INVALID_ARGUMENT, "missing id or key column"); grn_loader_save_error(ctx, loader); loader->columns_status = GRN_LOADER_COLUMNS_BROKEN; goto exit; } break; } loader->columns_status = GRN_LOADER_COLUMNS_SET; goto exit; } is_record_load = GRN_TRUE; /* Target columns and _id or _key are already specified. */ if (!nvalues) { /* * Accept empty arrays because a dump command may output a load command * which contains empty arrays for a table with deleted records. */ id = grn_table_add(ctx, loader->table, NULL, 0, NULL); } else { uint32_t expected_nvalues = ncols; if (loader->id_offset != -1 || loader->key_offset != -1) { expected_nvalues++; } if (nvalues != expected_nvalues) { ERR(GRN_INVALID_ARGUMENT, "unexpected #values: expected:%u, actual:%u", expected_nvalues, nvalues); grn_loader_save_error(ctx, loader); goto exit; } if (loader->id_offset != -1) { id_value = value + loader->id_offset; id = parse_id_value(ctx, id_value); if (grn_table_at(ctx, loader->table, id) == GRN_ID_NIL) { id = grn_table_add(ctx, loader->table, NULL, 0, NULL); } } else if (loader->key_offset != -1) { key_value = value + loader->key_offset; id = loader_add(ctx, key_value); } else { id = grn_table_add(ctx, loader->table, NULL, 0, NULL); } } if (id == GRN_ID_NIL) { /* Target record is not available. */ goto exit; } for (i = 0; i < nvalues; i++, value = values_next(ctx, value)) { if (i == loader->id_offset || i == loader->key_offset) { /* Skip _id and _key, because it's already used to get id. */ continue; } col = *cols; if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET) { set_vector(ctx, col, id, value); } else if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACE) { set_weight_vector(ctx, col, id, value); } else { grn_obj_set_value(ctx, col, id, value, GRN_OBJ_SET); } if (ctx->rc != GRN_SUCCESS) { char column_name[GRN_TABLE_MAX_KEY_SIZE]; unsigned int column_name_size; grn_loader_save_error(ctx, loader); column_name_size = grn_obj_name(ctx, col, column_name, GRN_TABLE_MAX_KEY_SIZE); report_set_column_value_failure(ctx, key_value, column_name, column_name_size, value); loader->n_column_errors++; ERRCLR(ctx); } cols++; } if (loader->each) { grn_obj *v = grn_expr_get_var_by_offset(ctx, loader->each, 0); GRN_RECORD_SET(ctx, v, id); grn_expr_exec(ctx, loader->each, 0); } loader->nrecords++; exit: if (is_record_load) { if (ctx->rc != GRN_SUCCESS) { loader->n_record_errors++; } if (loader->output_ids) { GRN_UINT32_PUT(ctx, &(loader->ids), id); } if (loader->output_errors) { GRN_INT32_PUT(ctx, &(loader->return_codes), ctx->rc); grn_vector_add_element(ctx, &(loader->error_messages), ctx->errbuf, strlen(ctx->errbuf), 0, GRN_DB_TEXT); } } loader->values_size = begin; ERRCLR(ctx); }
static grn_obj * create_lexicon_for_tokenize(grn_ctx *ctx, grn_obj *tokenizer_name, grn_obj *normalizer_name, grn_obj *token_filter_names) { grn_obj *lexicon; grn_obj *tokenizer; grn_obj *normalizer = NULL; tokenizer = grn_ctx_get(ctx, GRN_TEXT_VALUE(tokenizer_name), GRN_TEXT_LEN(tokenizer_name)); if (!tokenizer) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "[tokenize] nonexistent tokenizer: <%.*s>", (int)GRN_TEXT_LEN(tokenizer_name), GRN_TEXT_VALUE(tokenizer_name)); return NULL; } if (!grn_obj_is_tokenizer_proc(ctx, tokenizer)) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, tokenizer); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "[tokenize] not tokenizer: %.*s", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); grn_obj_unlink(ctx, tokenizer); return NULL; } if (GRN_TEXT_LEN(normalizer_name) > 0) { normalizer = grn_ctx_get(ctx, GRN_TEXT_VALUE(normalizer_name), GRN_TEXT_LEN(normalizer_name)); if (!normalizer) { grn_obj_unlink(ctx, tokenizer); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "[tokenize] nonexistent normalizer: <%.*s>", (int)GRN_TEXT_LEN(normalizer_name), GRN_TEXT_VALUE(normalizer_name)); return NULL; } if (!grn_obj_is_normalizer_proc(ctx, normalizer)) { grn_obj inspected; grn_obj_unlink(ctx, tokenizer); GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, normalizer); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "[tokenize] not normalizer: %.*s", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); grn_obj_unlink(ctx, normalizer); return NULL; } } lexicon = grn_table_create(ctx, NULL, 0, NULL, GRN_OBJ_TABLE_HASH_KEY, grn_ctx_at(ctx, GRN_DB_SHORT_TEXT), NULL); grn_obj_set_info(ctx, lexicon, GRN_INFO_DEFAULT_TOKENIZER, tokenizer); grn_obj_unlink(ctx, tokenizer); if (normalizer) { grn_obj_set_info(ctx, lexicon, GRN_INFO_NORMALIZER, normalizer); grn_obj_unlink(ctx, normalizer); } grn_proc_table_set_token_filters(ctx, lexicon, token_filter_names); return lexicon; }
static grn_obj * func_vector_slice(grn_ctx *ctx, int n_args, grn_obj **args, grn_user_data *user_data) { grn_obj *target; grn_obj *from_raw = NULL; grn_obj *length_raw = NULL; int64_t from = 0; int64_t length = -1; uint32_t to = 0; uint32_t size = 0; grn_obj *slice; if (n_args < 2 || n_args > 3) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_slice(): wrong number of arguments (%d for 2..3)", n_args); return NULL; } target = args[0]; from_raw = args[1]; if (n_args == 3) { length_raw = args[2]; } switch (target->header.type) { case GRN_VECTOR : case GRN_PVECTOR : case GRN_UVECTOR : size = grn_vector_size(ctx, target); break; default : { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, target, &inspected); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_slice(): target object must be vector: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } break; } if (!grn_type_id_is_number_family(ctx, from_raw->header.domain)) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, from_raw); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_slice(): from must be a number: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } if (from_raw->header.domain == GRN_DB_INT32) { from = GRN_INT32_VALUE(from_raw); } else if (from_raw->header.domain == GRN_DB_INT64) { from = GRN_INT64_VALUE(from_raw); } else { grn_obj buffer; grn_rc rc; GRN_INT64_INIT(&buffer, 0); rc = grn_obj_cast(ctx, from_raw, &buffer, GRN_FALSE); if (rc == GRN_SUCCESS) { from = GRN_INT64_VALUE(&buffer); } GRN_OBJ_FIN(ctx, &buffer); if (rc != GRN_SUCCESS) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, from_raw); GRN_PLUGIN_ERROR(ctx, rc, "vector_slice(): " "failed to cast from value to number: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } } if (length_raw) { if (!grn_type_id_is_number_family(ctx, length_raw->header.domain)) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, length_raw); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_slice(): length must be a number: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } if (length_raw->header.domain == GRN_DB_INT32) { length = GRN_INT32_VALUE(length_raw); } else if (length_raw->header.domain == GRN_DB_INT64) { length = GRN_INT64_VALUE(length_raw); } else { grn_obj buffer; grn_rc rc; GRN_INT64_INIT(&buffer, 0); rc = grn_obj_cast(ctx, length_raw, &buffer, GRN_FALSE); if (rc == GRN_SUCCESS) { length = GRN_INT64_VALUE(&buffer); } GRN_OBJ_FIN(ctx, &buffer); if (rc != GRN_SUCCESS) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, length_raw); GRN_PLUGIN_ERROR(ctx, rc, "vector_slice(): " "failed to cast length value to number: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } } } slice = grn_plugin_proc_alloc(ctx, user_data, target->header.domain, GRN_OBJ_VECTOR); if (!slice) { return NULL; } if (target->header.flags & GRN_OBJ_WITH_WEIGHT) { slice->header.flags |= GRN_OBJ_WITH_WEIGHT; } if (length < 0) { length = size + length + 1; } if (length > size) { length = size; } if (length <= 0) { return slice; } while (from < 0) { from += size; } to = from + length; if (to > size) { to = size; } switch (target->header.type) { case GRN_VECTOR : { unsigned int i; for (i = from; i < to; i++) { const char *content; unsigned int content_length; unsigned int weight; grn_id domain; content_length = grn_vector_get_element(ctx, target, i, &content, &weight, &domain); grn_vector_add_element(ctx, slice, content, content_length, weight, domain); } } break; case GRN_PVECTOR : { unsigned int i; for (i = from; i < to; i++) { grn_obj *element = GRN_PTR_VALUE_AT(target, i); GRN_PTR_PUT(ctx, slice, element); } } break; case GRN_UVECTOR : { unsigned int i; for (i = from; i < to; i++) { grn_id id; unsigned int weight; id = grn_uvector_get_element(ctx, target, i, &weight); grn_uvector_add_element(ctx, slice, id, weight); } } break; } return slice; }