static grn_rc sequential_fuzzy_search(grn_ctx *ctx, grn_obj *table, grn_obj *column, grn_obj *query, uint32_t max_distance, uint32_t prefix_match_size, uint32_t max_expansion, int flags, grn_obj *res, grn_operator op) { grn_table_cursor *tc; char *sx = GRN_TEXT_VALUE(query); char *ex = GRN_BULK_CURR(query); if (op == GRN_OP_AND) { tc = grn_table_cursor_open(ctx, res, NULL, 0, NULL, 0, 0, -1, GRN_CURSOR_BY_ID); } else { tc = grn_table_cursor_open(ctx, table, NULL, 0, NULL, 0, 0, -1, GRN_CURSOR_BY_ID); } if (tc) { grn_id id; grn_obj value; score_heap *heap; int i, n; GRN_TEXT_INIT(&value, 0); heap = score_heap_open(ctx, SCORE_HEAP_SIZE); if (!heap) { grn_table_cursor_close(ctx, tc); grn_obj_unlink(ctx, &value); return GRN_NO_MEMORY_AVAILABLE; } while ((id = grn_table_cursor_next(ctx, tc))) { unsigned int distance = 0; grn_obj *domain; GRN_BULK_REWIND(&value); grn_obj_get_value(ctx, column, id, &value); domain = grn_ctx_at(ctx, ((&value))->header.domain); if ((&(value))->header.type == GRN_VECTOR) { n = grn_vector_size(ctx, &value); for (i = 0; i < n; i++) { unsigned int length; const char *vector_value = NULL; length = grn_vector_get_element(ctx, &value, i, &vector_value, NULL, NULL); if (!prefix_match_size || (prefix_match_size > 0 && length >= prefix_match_size && !memcmp(sx, vector_value, prefix_match_size))) { distance = calc_edit_distance(ctx, sx, ex, (char *)vector_value, (char *)vector_value + length, flags); if (distance <= max_distance) { score_heap_push(ctx, heap, id, distance); break; } } } } else if ((&(value))->header.type == GRN_UVECTOR && grn_obj_is_table(ctx, domain)) { n = grn_vector_size(ctx, &value); for (i = 0; i < n; i++) { grn_id rid; char key_name[GRN_TABLE_MAX_KEY_SIZE]; int key_length; rid = grn_uvector_get_element(ctx, &value, i, NULL); key_length = grn_table_get_key(ctx, domain, rid, key_name, GRN_TABLE_MAX_KEY_SIZE); if (!prefix_match_size || (prefix_match_size > 0 && key_length >= prefix_match_size && !memcmp(sx, key_name, prefix_match_size))) { distance = calc_edit_distance(ctx, sx, ex, key_name, key_name + key_length, flags); if (distance <= max_distance) { score_heap_push(ctx, heap, id, distance); break; } } } } else { if (grn_obj_is_reference_column(ctx, column)) { grn_id rid; char key_name[GRN_TABLE_MAX_KEY_SIZE]; int key_length; rid = GRN_RECORD_VALUE(&value); key_length = grn_table_get_key(ctx, domain, rid, key_name, GRN_TABLE_MAX_KEY_SIZE); if (!prefix_match_size || (prefix_match_size > 0 && key_length >= prefix_match_size && !memcmp(sx, key_name, prefix_match_size))) { distance = calc_edit_distance(ctx, sx, ex, key_name, key_name + key_length, flags); if (distance <= max_distance) { score_heap_push(ctx, heap, id, distance); } } } else { if (!prefix_match_size || (prefix_match_size > 0 && GRN_TEXT_LEN(&value) >= prefix_match_size && !memcmp(sx, GRN_TEXT_VALUE(&value), prefix_match_size))) { distance = calc_edit_distance(ctx, sx, ex, GRN_TEXT_VALUE(&value), GRN_BULK_CURR(&value), flags); if (distance <= max_distance) { score_heap_push(ctx, heap, id, distance); } } } } grn_obj_unlink(ctx, domain); } grn_table_cursor_close(ctx, tc); grn_obj_unlink(ctx, &value); for (i = 0; i < heap->n_entries; i++) { if (max_expansion > 0 && i >= max_expansion) { break; } { grn_posting posting; posting.rid = heap->nodes[i].id; posting.sid = 1; posting.pos = 0; posting.weight = max_distance - heap->nodes[i].score; grn_ii_posting_add(ctx, &posting, (grn_hash *)res, op); } } grn_ii_resolve_sel_and(ctx, (grn_hash *)res, op); score_heap_close(ctx, heap); } return GRN_SUCCESS; }
/* * It gets a value of variable size column value for the record that * ID is _id_. * * @example Gets weight vector value * Groonga::Schema.define do |schema| * schema.create_table("Products", * :type => :patricia_trie, * :key_type => "ShortText") do |table| * # This is weight vector. * # ":with_weight => true" is important to store weight value. * table.short_text("tags", * :type => :vector, * :with_weight => true) * end * end * * products = Groonga["Products"] * rroonga = products.add("Rroonga") * rroonga.tags = [ * { * :value => "ruby", * :weight => 100, * }, * { * :value => "groonga", * :weight => 10, * }, * ] * * p rroonga.tags * # => [ * # {:value => "ruby", :weight => 100}, * # {:value => "groonga", :weight => 10} * # ] * * @overload [](id) * @param [Integer, Record] id The record ID. * @return [Array<Hash<Symbol, String>>] An array of value if the column * is a weight vector column. * Each value is a Hash like the following form: * * <pre> * { * :value => [KEY], * :weight => [WEIGHT], * } * </pre> * * @[KEY]@ is the key of the table that is specified as range on * creating the weight vector. * * @[WEIGHT]@ is a positive integer. * * @return [::Object] See {Groonga::Object#[]} for columns except * weight vector column. * * @since 4.0.1. */ static VALUE rb_grn_variable_size_column_array_reference (VALUE self, VALUE rb_id) { grn_ctx *context = NULL; grn_obj *column, *range; grn_id id; grn_obj *value; VALUE rb_value; VALUE rb_range; unsigned int i, n; rb_grn_variable_size_column_deconstruct(SELF(self), &column, &context, NULL, NULL, &value, NULL, NULL, &range); if (!(column->header.flags & GRN_OBJ_WITH_WEIGHT)) { return rb_call_super(1, &rb_id); } id = RVAL2GRNID(rb_id, context, range, self); grn_obj_reinit(context, value, value->header.domain, value->header.flags | GRN_OBJ_VECTOR); grn_obj_get_value(context, column, id, value); rb_grn_context_check(context, self); rb_range = GRNTABLE2RVAL(context, range, GRN_FALSE); n = grn_vector_size(context, value); rb_value = rb_ary_new2(n); for (i = 0; i < n; i++) { VALUE rb_element_value; unsigned int weight = 0; grn_id domain; VALUE rb_element; if (value->header.type == GRN_UVECTOR) { grn_id id; id = grn_uvector_get_element(context, value, i, &weight); rb_element_value = rb_grn_record_new(rb_range, id, Qnil); } else { const char *element_value; unsigned int element_value_length; element_value_length = grn_vector_get_element(context, value, i, &element_value, &weight, &domain); rb_element_value = rb_str_new(element_value, element_value_length); } rb_element = rb_hash_new(); rb_hash_aset(rb_element, ID2SYM(rb_intern("value")), rb_element_value); rb_hash_aset(rb_element, ID2SYM(rb_intern("weight")), UINT2NUM(weight)); rb_ary_push(rb_value, rb_element); } return rb_value; }
static grn_obj * command_tag_synonym(grn_ctx *ctx, GNUC_UNUSED int nargs, GNUC_UNUSED grn_obj **args, GNUC_UNUSED grn_user_data *user_data) { GNUC_UNUSED grn_obj *flags = grn_ctx_pop(ctx); grn_obj *newvalue = grn_ctx_pop(ctx); grn_obj *oldvalue = grn_ctx_pop(ctx); GNUC_UNUSED grn_obj *id = grn_ctx_pop(ctx); grn_obj buf; grn_obj record; grn_obj *domain; grn_obj *table; grn_obj *column; int i,n; if (GRN_BULK_VSIZE(newvalue) == 0 || GRN_INT32_VALUE(flags) == 0) { return NULL; } table = grn_ctx_at(ctx, oldvalue->header.domain); if (table && !is_table(table)) { GRN_PLUGIN_LOG(ctx, GRN_LOG_WARNING, "[tag-synonym] " "hooked column must be reference type"); return NULL; } column = grn_obj_column(ctx, table, SYNONYM_COLUMN_NAME, SYNONYM_COLUMN_NAME_LEN); if (!column) { GRN_PLUGIN_LOG(ctx, GRN_LOG_WARNING, "[tag-synonym] " "couldn't open synonym column"); return NULL; } GRN_TEXT_INIT(&buf, 0); domain = grn_ctx_at(ctx, newvalue->header.domain); if (domain && is_string(domain)) { GRN_RECORD_INIT(&record, GRN_OBJ_VECTOR, oldvalue->header.domain); grn_table_tokenize(ctx, table, GRN_TEXT_VALUE(newvalue), GRN_TEXT_LEN(newvalue), &record, GRN_TRUE); } else if (newvalue->header.type == GRN_UVECTOR) { record = *newvalue; } if (is_string(domain) || newvalue->header.type == GRN_UVECTOR) { grn_obj value; GRN_RECORD_INIT(newvalue, GRN_OBJ_VECTOR, oldvalue->header.domain); GRN_UINT32_INIT(&value, 0); n = grn_vector_size(ctx, &record); for (i = 0; i < n; i++) { grn_id tid; tid = grn_uvector_get_element(ctx, &record, i, NULL); GRN_BULK_REWIND(&value); grn_obj_get_value(ctx, column, tid, &value); if (GRN_UINT32_VALUE(&value)) { GRN_PLUGIN_LOG(ctx, GRN_LOG_INFO, "[tag-synonym] " "changed: tid %d -> %d", tid, GRN_UINT32_VALUE(&value)); tid = GRN_UINT32_VALUE(&value); } grn_uvector_add_element(ctx, newvalue, tid, 0); } grn_obj_unlink(ctx, &value); } else { grn_id tid; grn_obj value; tid = GRN_RECORD_VALUE(newvalue); GRN_UINT32_INIT(&value, 0); grn_obj_get_value(ctx, column, tid, &value); if (GRN_UINT32_VALUE(&value)) { GRN_PLUGIN_LOG(ctx, GRN_LOG_INFO, "[tag-synonym] " "changed: tid %d -> %d", tid, GRN_UINT32_VALUE(&value)); tid = GRN_UINT32_VALUE(&value); GRN_BULK_REWIND(newvalue); GRN_RECORD_SET(ctx, newvalue, tid); } grn_obj_unlink(ctx, &value); } grn_obj_unlink(ctx, &buf); return NULL; }
static grn_bool exec_regexp_uvector_bulk(grn_ctx *ctx, grn_obj *uvector, grn_obj *pattern) { #ifdef GRN_SUPPORT_REGEXP grn_bool matched = GRN_FALSE; unsigned int i, size; OnigRegex regex; grn_obj *domain; grn_obj *normalizer; grn_obj *normalizer_auto = NULL; size = grn_uvector_size(ctx, uvector); if (size == 0) { return GRN_FALSE; } regex = grn_onigmo_new(ctx, GRN_TEXT_VALUE(pattern), GRN_TEXT_LEN(pattern), GRN_ONIGMO_OPTION_DEFAULT, GRN_ONIGMO_SYNTAX_DEFAULT, "[operator]"); if (!regex) { return GRN_FALSE; } domain = grn_ctx_at(ctx, uvector->header.domain); if (!domain) { onig_free(regex); return GRN_FALSE; } grn_table_get_info(ctx, domain, NULL, NULL, NULL, &normalizer, NULL); if (!normalizer) { normalizer_auto = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1); } for (i = 0; i < size; i++) { grn_id record_id; char key[GRN_TABLE_MAX_KEY_SIZE]; int key_size; record_id = grn_uvector_get_element(ctx, uvector, i, NULL); key_size = grn_table_get_key(ctx, domain, record_id, key, GRN_TABLE_MAX_KEY_SIZE); if (key_size == 0) { continue; } if (normalizer) { matched = regexp_is_match(ctx, regex, key, key_size); } else { grn_obj *norm_key; const char *norm_key_raw; unsigned int norm_key_raw_length_in_bytes; norm_key = grn_string_open(ctx, key, key_size, normalizer_auto, 0); grn_string_get_normalized(ctx, norm_key, &norm_key_raw, &norm_key_raw_length_in_bytes, NULL); matched = regexp_is_match(ctx, regex, norm_key_raw, norm_key_raw_length_in_bytes); grn_obj_unlink(ctx, norm_key); } if (matched) { break; } } if (normalizer_auto) { grn_obj_unlink(ctx, normalizer_auto); } grn_obj_unlink(ctx, domain); onig_free(regex); return matched; #else /* GRN_SUPPORT_REGEXP */ return GRN_FALSE; #endif /* GRN_SUPPORT_REGEXP */ }
static grn_obj * func_vector_slice(grn_ctx *ctx, int n_args, grn_obj **args, grn_user_data *user_data) { grn_obj *target; grn_obj *from_raw = NULL; grn_obj *length_raw = NULL; int64_t from = 0; int64_t length = -1; uint32_t to = 0; uint32_t size = 0; grn_obj *slice; if (n_args < 2 || n_args > 3) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_slice(): wrong number of arguments (%d for 2..3)", n_args); return NULL; } target = args[0]; from_raw = args[1]; if (n_args == 3) { length_raw = args[2]; } switch (target->header.type) { case GRN_VECTOR : case GRN_PVECTOR : case GRN_UVECTOR : size = grn_vector_size(ctx, target); break; default : { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, target, &inspected); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_slice(): target object must be vector: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } break; } if (!grn_type_id_is_number_family(ctx, from_raw->header.domain)) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, from_raw); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_slice(): from must be a number: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } if (from_raw->header.domain == GRN_DB_INT32) { from = GRN_INT32_VALUE(from_raw); } else if (from_raw->header.domain == GRN_DB_INT64) { from = GRN_INT64_VALUE(from_raw); } else { grn_obj buffer; grn_rc rc; GRN_INT64_INIT(&buffer, 0); rc = grn_obj_cast(ctx, from_raw, &buffer, GRN_FALSE); if (rc == GRN_SUCCESS) { from = GRN_INT64_VALUE(&buffer); } GRN_OBJ_FIN(ctx, &buffer); if (rc != GRN_SUCCESS) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, from_raw); GRN_PLUGIN_ERROR(ctx, rc, "vector_slice(): " "failed to cast from value to number: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } } if (length_raw) { if (!grn_type_id_is_number_family(ctx, length_raw->header.domain)) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, length_raw); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_slice(): length must be a number: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } if (length_raw->header.domain == GRN_DB_INT32) { length = GRN_INT32_VALUE(length_raw); } else if (length_raw->header.domain == GRN_DB_INT64) { length = GRN_INT64_VALUE(length_raw); } else { grn_obj buffer; grn_rc rc; GRN_INT64_INIT(&buffer, 0); rc = grn_obj_cast(ctx, length_raw, &buffer, GRN_FALSE); if (rc == GRN_SUCCESS) { length = GRN_INT64_VALUE(&buffer); } GRN_OBJ_FIN(ctx, &buffer); if (rc != GRN_SUCCESS) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, length_raw); GRN_PLUGIN_ERROR(ctx, rc, "vector_slice(): " "failed to cast length value to number: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } } } slice = grn_plugin_proc_alloc(ctx, user_data, target->header.domain, GRN_OBJ_VECTOR); if (!slice) { return NULL; } if (target->header.flags & GRN_OBJ_WITH_WEIGHT) { slice->header.flags |= GRN_OBJ_WITH_WEIGHT; } if (length < 0) { length = size + length + 1; } if (length > size) { length = size; } if (length <= 0) { return slice; } while (from < 0) { from += size; } to = from + length; if (to > size) { to = size; } switch (target->header.type) { case GRN_VECTOR : { unsigned int i; for (i = from; i < to; i++) { const char *content; unsigned int content_length; unsigned int weight; grn_id domain; content_length = grn_vector_get_element(ctx, target, i, &content, &weight, &domain); grn_vector_add_element(ctx, slice, content, content_length, weight, domain); } } break; case GRN_PVECTOR : { unsigned int i; for (i = from; i < to; i++) { grn_obj *element = GRN_PTR_VALUE_AT(target, i); GRN_PTR_PUT(ctx, slice, element); } } break; case GRN_UVECTOR : { unsigned int i; for (i = from; i < to; i++) { grn_id id; unsigned int weight; id = grn_uvector_get_element(ctx, target, i, &weight); grn_uvector_add_element(ctx, slice, id, weight); } } break; } return slice; }