static int hash_element_to_vector_element(VALUE key, VALUE value, VALUE user_data) { HashElementToVectorElementData *data = (HashElementToVectorElementData *)user_data; unsigned int weight; weight = NUM2UINT(value); if (data->vector->header.type == GRN_UVECTOR) { grn_id id = RVAL2GRNID(key, data->context, data->range, data->self); grn_uvector_add_element(data->context, data->vector, id, weight); } else { GRN_BULK_REWIND(data->element_value); RVAL2GRNBULK(key, data->context, data->element_value); grn_vector_add_element(data->context, data->vector, GRN_BULK_HEAD(data->element_value), GRN_BULK_VSIZE(data->element_value), weight, data->element_value->header.domain); } return ST_CONTINUE; }
/* * It updates a value of variable size column value for the record * that ID is _id_. * * Weight vector column is a special variable size column. This * description describes only weight vector column. Other variable * size column works what you think. * * @example Use weight vector as matrix search result weight * Groonga::Schema.define do |schema| * schema.create_table("Products", * :type => :patricia_trie, * :key_type => "ShortText") do |table| * # This is weight vector. * # ":with_weight => true" is important for matrix search result weight. * table.short_text("tags", * :type => :vector, * :with_weight => true) * end * * schema.create_table("Tags", * :type => :hash, * :key_type => "ShortText") do |table| * # This is inverted index. It also needs ":with_weight => true". * table.index("Products.tags", :with_weight => true) * end * end * * products = Groonga["Products"] * groonga = products.add("Groonga") * groonga.tags = [ * { * :value => "groonga", * :weight => 100, * }, * ] * rroonga = products.add("Rroonga") * rroonga.tags = [ * { * :value => "ruby", * :weight => 100, * }, * { * :value => "groonga", * :weight => 10, * }, * ] * * result = products.select do |record| * # Search by "groonga" * record.match("groonga") do |match_target| * match_target.tags * end * end * * result.each do |record| * p [record.key.key, record.score] * end * # Matches all records with weight. * # => ["Groonga", 101] * # ["Rroonga", 11] * * # Increases score for "ruby" 10 times * products.select(# The previous search result. Required. * :result => result, * # It just adds score to existing records in the result. Required. * :operator => Groonga::Operator::ADJUST) do |record| * record.match("ruby") do |target| * target.tags * 10 # 10 times * end * end * * result.each do |record| * p [record.key.key, record.score] * end * # Weight is used for increasing score. * # => ["Groonga", 101] <- Not changed. * # ["Rroonga", 1021] <- 1021 (= 101 * 10 + 1) increased. * * @overload []=(id, elements) * This description is for weight vector column. * * @param [Integer, Record] id The record ID. * @param [Array<Hash<Symbol, String>>] elements An array of values * for weight vector. * Each value is a Hash like the following form: * * <pre> * { * :value => [KEY], * :weight => [WEIGHT], * } * </pre> * * @[KEY]@ must be the same type of the key of the table that is * specified as range on creating the weight vector. * * @[WEIGHT]@ must be an positive integer. Note that search * becomes @weight + 1@. It means that You want to get 10 as * score, you should set 9 as weight. * * @overload []=(id, value) * This description is for variable size columns except weight * vector column. * * @param [Integer, Record] id The record ID. * @param [::Object] value A new value. * @see Groonga::Object#[]= * * @since 4.0.1 */ static VALUE rb_grn_variable_size_column_array_set (VALUE self, VALUE rb_id, VALUE rb_value) { grn_ctx *context = NULL; grn_obj *column, *range; grn_rc rc; grn_id id; grn_obj *value, *element_value; int flags = GRN_OBJ_SET; rb_grn_variable_size_column_deconstruct(SELF(self), &column, &context, NULL, NULL, &value, &element_value, NULL, &range); if (!(column->header.flags & GRN_OBJ_WITH_WEIGHT)) { VALUE args[2]; args[0] = rb_id; args[1] = rb_value; return rb_call_super(2, args); } id = RVAL2GRNID(rb_id, context, range, self); grn_obj_reinit(context, value, value->header.domain, value->header.flags | GRN_OBJ_VECTOR); value->header.flags |= GRN_OBJ_WITH_WEIGHT; if (RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cArray))) { int i, n; n = RARRAY_LEN(rb_value); for (i = 0; i < n; i++) { unsigned int weight = 0; VALUE rb_element_value, rb_weight; rb_grn_scan_options(RARRAY_PTR(rb_value)[i], "value", &rb_element_value, "weight", &rb_weight, NULL); if (!NIL_P(rb_weight)) { weight = NUM2UINT(rb_weight); } if (value->header.type == GRN_UVECTOR) { grn_id id = RVAL2GRNID(rb_element_value, context, range, self); grn_uvector_add_element(context, value, id, weight); } else { GRN_BULK_REWIND(element_value); if (!NIL_P(rb_element_value)) { RVAL2GRNBULK(rb_element_value, context, element_value); } grn_vector_add_element(context, value, GRN_BULK_HEAD(element_value), GRN_BULK_VSIZE(element_value), weight, element_value->header.domain); } } } else if (RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cHash))) { HashElementToVectorElementData data; data.self = self; data.context = context; data.vector = value; data.element_value = element_value; data.range = range; rb_hash_foreach(rb_value, hash_element_to_vector_element, (VALUE)&data); } else { rb_raise(rb_eArgError, "<%s>: " "weight vector value must be an array of index value or " "a hash that key is vector value and value is vector weight: " "<%s>", rb_grn_inspect(self), rb_grn_inspect(rb_value)); } rc = grn_obj_set_value(context, column, id, value, flags); rb_grn_context_check(context, self); rb_grn_rc_check(rc, self); return rb_value; }
static grn_obj * command_tag_synonym(grn_ctx *ctx, GNUC_UNUSED int nargs, GNUC_UNUSED grn_obj **args, GNUC_UNUSED grn_user_data *user_data) { GNUC_UNUSED grn_obj *flags = grn_ctx_pop(ctx); grn_obj *newvalue = grn_ctx_pop(ctx); grn_obj *oldvalue = grn_ctx_pop(ctx); GNUC_UNUSED grn_obj *id = grn_ctx_pop(ctx); grn_obj buf; grn_obj record; grn_obj *domain; grn_obj *table; grn_obj *column; int i,n; if (GRN_BULK_VSIZE(newvalue) == 0 || GRN_INT32_VALUE(flags) == 0) { return NULL; } table = grn_ctx_at(ctx, oldvalue->header.domain); if (table && !is_table(table)) { GRN_PLUGIN_LOG(ctx, GRN_LOG_WARNING, "[tag-synonym] " "hooked column must be reference type"); return NULL; } column = grn_obj_column(ctx, table, SYNONYM_COLUMN_NAME, SYNONYM_COLUMN_NAME_LEN); if (!column) { GRN_PLUGIN_LOG(ctx, GRN_LOG_WARNING, "[tag-synonym] " "couldn't open synonym column"); return NULL; } GRN_TEXT_INIT(&buf, 0); domain = grn_ctx_at(ctx, newvalue->header.domain); if (domain && is_string(domain)) { GRN_RECORD_INIT(&record, GRN_OBJ_VECTOR, oldvalue->header.domain); grn_table_tokenize(ctx, table, GRN_TEXT_VALUE(newvalue), GRN_TEXT_LEN(newvalue), &record, GRN_TRUE); } else if (newvalue->header.type == GRN_UVECTOR) { record = *newvalue; } if (is_string(domain) || newvalue->header.type == GRN_UVECTOR) { grn_obj value; GRN_RECORD_INIT(newvalue, GRN_OBJ_VECTOR, oldvalue->header.domain); GRN_UINT32_INIT(&value, 0); n = grn_vector_size(ctx, &record); for (i = 0; i < n; i++) { grn_id tid; tid = grn_uvector_get_element(ctx, &record, i, NULL); GRN_BULK_REWIND(&value); grn_obj_get_value(ctx, column, tid, &value); if (GRN_UINT32_VALUE(&value)) { GRN_PLUGIN_LOG(ctx, GRN_LOG_INFO, "[tag-synonym] " "changed: tid %d -> %d", tid, GRN_UINT32_VALUE(&value)); tid = GRN_UINT32_VALUE(&value); } grn_uvector_add_element(ctx, newvalue, tid, 0); } grn_obj_unlink(ctx, &value); } else { grn_id tid; grn_obj value; tid = GRN_RECORD_VALUE(newvalue); GRN_UINT32_INIT(&value, 0); grn_obj_get_value(ctx, column, tid, &value); if (GRN_UINT32_VALUE(&value)) { GRN_PLUGIN_LOG(ctx, GRN_LOG_INFO, "[tag-synonym] " "changed: tid %d -> %d", tid, GRN_UINT32_VALUE(&value)); tid = GRN_UINT32_VALUE(&value); GRN_BULK_REWIND(newvalue); GRN_RECORD_SET(ctx, newvalue, tid); } grn_obj_unlink(ctx, &value); } grn_obj_unlink(ctx, &buf); return NULL; }
static grn_obj * func_vector_slice(grn_ctx *ctx, int n_args, grn_obj **args, grn_user_data *user_data) { grn_obj *target; grn_obj *from_raw = NULL; grn_obj *length_raw = NULL; int64_t from = 0; int64_t length = -1; uint32_t to = 0; uint32_t size = 0; grn_obj *slice; if (n_args < 2 || n_args > 3) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_slice(): wrong number of arguments (%d for 2..3)", n_args); return NULL; } target = args[0]; from_raw = args[1]; if (n_args == 3) { length_raw = args[2]; } switch (target->header.type) { case GRN_VECTOR : case GRN_PVECTOR : case GRN_UVECTOR : size = grn_vector_size(ctx, target); break; default : { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, target, &inspected); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_slice(): target object must be vector: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } break; } if (!grn_type_id_is_number_family(ctx, from_raw->header.domain)) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, from_raw); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_slice(): from must be a number: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } if (from_raw->header.domain == GRN_DB_INT32) { from = GRN_INT32_VALUE(from_raw); } else if (from_raw->header.domain == GRN_DB_INT64) { from = GRN_INT64_VALUE(from_raw); } else { grn_obj buffer; grn_rc rc; GRN_INT64_INIT(&buffer, 0); rc = grn_obj_cast(ctx, from_raw, &buffer, GRN_FALSE); if (rc == GRN_SUCCESS) { from = GRN_INT64_VALUE(&buffer); } GRN_OBJ_FIN(ctx, &buffer); if (rc != GRN_SUCCESS) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, from_raw); GRN_PLUGIN_ERROR(ctx, rc, "vector_slice(): " "failed to cast from value to number: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } } if (length_raw) { if (!grn_type_id_is_number_family(ctx, length_raw->header.domain)) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, length_raw); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "vector_slice(): length must be a number: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } if (length_raw->header.domain == GRN_DB_INT32) { length = GRN_INT32_VALUE(length_raw); } else if (length_raw->header.domain == GRN_DB_INT64) { length = GRN_INT64_VALUE(length_raw); } else { grn_obj buffer; grn_rc rc; GRN_INT64_INIT(&buffer, 0); rc = grn_obj_cast(ctx, length_raw, &buffer, GRN_FALSE); if (rc == GRN_SUCCESS) { length = GRN_INT64_VALUE(&buffer); } GRN_OBJ_FIN(ctx, &buffer); if (rc != GRN_SUCCESS) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, length_raw); GRN_PLUGIN_ERROR(ctx, rc, "vector_slice(): " "failed to cast length value to number: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); return NULL; } } } slice = grn_plugin_proc_alloc(ctx, user_data, target->header.domain, GRN_OBJ_VECTOR); if (!slice) { return NULL; } if (target->header.flags & GRN_OBJ_WITH_WEIGHT) { slice->header.flags |= GRN_OBJ_WITH_WEIGHT; } if (length < 0) { length = size + length + 1; } if (length > size) { length = size; } if (length <= 0) { return slice; } while (from < 0) { from += size; } to = from + length; if (to > size) { to = size; } switch (target->header.type) { case GRN_VECTOR : { unsigned int i; for (i = from; i < to; i++) { const char *content; unsigned int content_length; unsigned int weight; grn_id domain; content_length = grn_vector_get_element(ctx, target, i, &content, &weight, &domain); grn_vector_add_element(ctx, slice, content, content_length, weight, domain); } } break; case GRN_PVECTOR : { unsigned int i; for (i = from; i < to; i++) { grn_obj *element = GRN_PTR_VALUE_AT(target, i); GRN_PTR_PUT(ctx, slice, element); } } break; case GRN_UVECTOR : { unsigned int i; for (i = from; i < to; i++) { grn_id id; unsigned int weight; id = grn_uvector_get_element(ctx, target, i, &weight); grn_uvector_add_element(ctx, slice, id, weight); } } break; } return slice; }