grn_obj * rb_grn_key_from_ruby_object (VALUE rb_key, grn_ctx *context, grn_obj *key, grn_id domain_id, grn_obj *domain, VALUE related_object) { grn_id id; if (!domain) return RVAL2GRNBULK(rb_key, context, key); switch (domain->header.type) { case GRN_TYPE: return RVAL2GRNBULK_WITH_TYPE(rb_key, context, key, domain_id, domain); break; case GRN_TABLE_HASH_KEY: case GRN_TABLE_PAT_KEY: case GRN_TABLE_DAT_KEY: case GRN_TABLE_NO_KEY: id = RVAL2GRNID(rb_key, context, domain, related_object); break; default: if (!RVAL2CBOOL(rb_obj_is_kind_of(rb_key, rb_cInteger))) rb_raise(rb_eGrnError, "should be unsigned integer: <%s>: <%s>", rb_grn_inspect(rb_key), rb_grn_inspect(related_object)); id = NUM2UINT(rb_key); break; } GRN_TEXT_SET(context, key, &id, sizeof(id)); return key; }
static int hash_element_to_vector_element(VALUE key, VALUE value, VALUE user_data) { HashElementToVectorElementData *data = (HashElementToVectorElementData *)user_data; unsigned int weight; weight = NUM2UINT(value); if (data->vector->header.type == GRN_UVECTOR) { grn_id id = RVAL2GRNID(key, data->context, data->range, data->self); grn_uvector_add_element(data->context, data->vector, id, weight); } else { GRN_BULK_REWIND(data->element_value); RVAL2GRNBULK(key, data->context, data->element_value); grn_vector_add_element(data->context, data->vector, GRN_BULK_HEAD(data->element_value), GRN_BULK_VSIZE(data->element_value), weight, data->element_value->header.domain); } return ST_CONTINUE; }
/* * IDが _id_ であるレコードを高速に全文検索するため転置索引を作 * 成する。多くの場合、 {Groonga::Table#define_index_column} で * +:source+ オプションを指定することにより、自動的に全文検索 * 用の索引は更新されるので、明示的にこのメソッドを使うこと * は少ない。 * * @example 記事の段落毎に索引を作成する。 * articles = Groonga::Array.create(:name => "<articles>") * articles.define_column("title", "ShortText") * articles.define_column("content", "Text") * * terms = Groonga::Hash.create(:name => "<terms>", * :default_tokenizer => "TokenBigram") * content_index = terms.define_index_column("content", articles, * :with_section => true) * * content = <<-EOC * groonga は組み込み型の全文検索エンジンライブラリです。 * DBMSやスクリプト言語処理系等に組み込むことによって、その * 全文検索機能を強化することができます。また、リレーショナ * ルモデルに基づくデータストア機能を内包しており、groonga * 単体でも高速なデータストアサーバとして使用することができ * ます。 * * ■全文検索方式 * 転置索引型の全文検索エンジンです。転置索引は圧縮されてファ * イルに格納され、検索時のディスク読み出し量を小さく、かつ * 局所的に抑えるように設計されています。用途に応じて以下の * 索引タイプを選択できます。 * EOC * * groonga = articles.add(:title => "groonga", :content => content) * * content.split(/\n{2,}/).each_with_index do |sentence, i| * content_index[groonga] = {:value => sentence, :section => i + 1} * end * * content_index.search("エンジン").collect do |record| * p record.key["title"] # -> "groonga" * end * * @overload []=(id, value) * @param [String] value 新しい値 * @overload []=(id, options) * _options_ を指定することにより、 _value_ を指定したときよりも索引の作 * 成を制御できる。 * @param [::Hash] options The name and value * pairs. Omitted names are initialized as the default value * @option options :section * 段落番号を指定する。省略した場合は1を指定したとみなされ * る。 * {Groonga::Table#define_index_column} で * @{:with_section => true}@ を指定していなければい * けない。 * @option options :old_value * 以前の値を指定する。省略した場合は現在の値が用いられる。 * 通常は指定する必要はない。 * @option options :value * 新しい値を指定する。 _value_ を指定した場合と _options_ で * @{:value => value}@ を指定した場合は同じ動作とな * る。 * * @deprecated Since 3.0.2. Use {#add}, {#delete} or {#update} instead. */ static VALUE rb_grn_index_column_array_set (VALUE self, VALUE rb_id, VALUE rb_value) { grn_ctx *context = NULL; grn_obj *column, *range; grn_rc rc; grn_id id; unsigned int section; grn_obj *old_value, *new_value; VALUE original_rb_value, rb_section, rb_old_value, rb_new_value; original_rb_value = rb_value; rb_grn_index_column_deconstruct(SELF(self), &column, &context, NULL, NULL, &new_value, &old_value, NULL, &range, NULL, NULL); id = RVAL2GRNID(rb_id, context, range, self); if (!RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cHash))) { VALUE hash_value; hash_value = rb_hash_new(); rb_hash_aset(hash_value, RB_GRN_INTERN("value"), rb_value); rb_value = hash_value; } rb_grn_scan_options(rb_value, "section", &rb_section, "old_value", &rb_old_value, "value", &rb_new_value, NULL); if (NIL_P(rb_section)) section = 1; else section = NUM2UINT(rb_section); if (NIL_P(rb_old_value)) { old_value = NULL; } else { GRN_BULK_REWIND(old_value); RVAL2GRNBULK(rb_old_value, context, old_value); } if (NIL_P(rb_new_value)) { new_value = NULL; } else { GRN_BULK_REWIND(new_value); RVAL2GRNBULK(rb_new_value, context, new_value); } rc = grn_column_index_update(context, column, id, section, old_value, new_value); rb_grn_context_check(context, self); rb_grn_rc_check(rc, self); return original_rb_value; }
/* * Updates a record that has @new_value@ as new content and * @old_value@ as old content in inverted index. Normally, this method * is not used explicitly. Inverted index for fulltext search is * updated automatically by using @:source@ option of * {Groonga::Table#define_index_column}. * * @example Updates sentences of an article in index * articles = Groonga::Array.create(:name => "Articles") * articles.define_column("title", "ShortText") * articles.define_column("content", "Text") * * terms = Groonga::Hash.create(:name => "Terms", * :key_type => "ShortText", * :default_tokenizer => "TokenBigram") * content_index = terms.define_index_column("content", articles, * :with_position => true, * :with_section => true) * * old_sentence = <<-SENTENCE * Groonga is a fast and accurate full text search engine based on * inverted index. One of the characteristics of groonga is that a * newly registered document instantly appears in search * results. Also, groonga allows updates without read locks. These * characteristics result in superior performance on real-time * applications. * SENTENCE * * new_sentence = <<-SENTENCE * Groonga is also a column-oriented database management system * (DBMS). Compared with well-known row-oriented systems, such as * MySQL and PostgreSQL, column-oriented systems are more suited for * aggregate queries. Due to this advantage, groonga can cover * weakness of row-oriented systems. * SENTENCE * * groonga = articles.add(:title => "groonga", :content => old_sentence) * * content_index.add(groonga, old_sentence, :section => 1) * p content_index.search("engine").size # -> 1 * p content_index.search("MySQL").size # -> 0 * * groonga[:content] = new_sentence * content_index.update(groonga, old_sentence, new_sentence, :section => 1) * p content_index.search("engine").size # -> 0 * p content_index.search("MySQL").size # -> 1 * * @overload update(record, old_value, new_value, options={}) * @param [Groonga::Record, Integer] record * The record that has a @new_value@ as its new value and * @old_value@ as its old value. It can be Integer as record id. * @param [String] old_value * The old value of the @record@. * @param [String] new_value * The new value of the @record@. * @param [::Hash] options * The options. * @option options [Integer] :section (1) * The section number. It is one-origin. * * You must specify @{:with_section => true}@ in * {Groonga::Table#define_index_column} to use this option. * @return [void] * * @since 3.0.2 */ static VALUE rb_grn_index_column_update (int argc, VALUE *argv, VALUE self) { grn_ctx *context = NULL; grn_obj *column, *range; grn_rc rc; grn_id id; unsigned int section; grn_obj *old_value, *new_value; VALUE rb_record, rb_old_value, rb_new_value, rb_options, rb_section; rb_scan_args(argc, argv, "31", &rb_record, &rb_old_value, &rb_new_value, &rb_options); rb_grn_index_column_deconstruct(SELF(self), &column, &context, NULL, NULL, &new_value, &old_value, NULL, &range, NULL, NULL); id = RVAL2GRNID(rb_record, context, range, self); if (NIL_P(rb_old_value)) { old_value = NULL; } else { GRN_BULK_REWIND(old_value); RVAL2GRNBULK(rb_old_value, context, old_value); } if (NIL_P(rb_new_value)) { new_value = NULL; } else { GRN_BULK_REWIND(new_value); RVAL2GRNBULK(rb_new_value, context, new_value); } rb_grn_scan_options(rb_options, "section", &rb_section, NULL); if (NIL_P(rb_section)) { section = 1; } else { section = NUM2UINT(rb_section); } rc = grn_column_index_update(context, column, id, section, old_value, new_value); rb_grn_context_check(context, self); rb_grn_rc_check(rc, self); return self; }
/* * It updates a value of variable size column value for the record * that ID is _id_. * * Weight vector column is a special variable size column. This * description describes only weight vector column. Other variable * size column works what you think. * * @example Use weight vector as matrix search result weight * Groonga::Schema.define do |schema| * schema.create_table("Products", * :type => :patricia_trie, * :key_type => "ShortText") do |table| * # This is weight vector. * # ":with_weight => true" is important for matrix search result weight. * table.short_text("tags", * :type => :vector, * :with_weight => true) * end * * schema.create_table("Tags", * :type => :hash, * :key_type => "ShortText") do |table| * # This is inverted index. It also needs ":with_weight => true". * table.index("Products.tags", :with_weight => true) * end * end * * products = Groonga["Products"] * groonga = products.add("Groonga") * groonga.tags = [ * { * :value => "groonga", * :weight => 100, * }, * ] * rroonga = products.add("Rroonga") * rroonga.tags = [ * { * :value => "ruby", * :weight => 100, * }, * { * :value => "groonga", * :weight => 10, * }, * ] * * result = products.select do |record| * # Search by "groonga" * record.match("groonga") do |match_target| * match_target.tags * end * end * * result.each do |record| * p [record.key.key, record.score] * end * # Matches all records with weight. * # => ["Groonga", 101] * # ["Rroonga", 11] * * # Increases score for "ruby" 10 times * products.select(# The previous search result. Required. * :result => result, * # It just adds score to existing records in the result. Required. * :operator => Groonga::Operator::ADJUST) do |record| * record.match("ruby") do |target| * target.tags * 10 # 10 times * end * end * * result.each do |record| * p [record.key.key, record.score] * end * # Weight is used for increasing score. * # => ["Groonga", 101] <- Not changed. * # ["Rroonga", 1021] <- 1021 (= 101 * 10 + 1) increased. * * @overload []=(id, elements) * This description is for weight vector column. * * @param [Integer, Record] id The record ID. * @param [Array<Hash<Symbol, String>>] elements An array of values * for weight vector. * Each value is a Hash like the following form: * * <pre> * { * :value => [KEY], * :weight => [WEIGHT], * } * </pre> * * @[KEY]@ must be the same type of the key of the table that is * specified as range on creating the weight vector. * * @[WEIGHT]@ must be an positive integer. Note that search * becomes @weight + 1@. It means that You want to get 10 as * score, you should set 9 as weight. * * @overload []=(id, value) * This description is for variable size columns except weight * vector column. * * @param [Integer, Record] id The record ID. * @param [::Object] value A new value. * @see Groonga::Object#[]= * * @since 4.0.1 */ static VALUE rb_grn_variable_size_column_array_set (VALUE self, VALUE rb_id, VALUE rb_value) { grn_ctx *context = NULL; grn_obj *column, *range; grn_rc rc; grn_id id; grn_obj *value, *element_value; int flags = GRN_OBJ_SET; rb_grn_variable_size_column_deconstruct(SELF(self), &column, &context, NULL, NULL, &value, &element_value, NULL, &range); if (!(column->header.flags & GRN_OBJ_WITH_WEIGHT)) { VALUE args[2]; args[0] = rb_id; args[1] = rb_value; return rb_call_super(2, args); } id = RVAL2GRNID(rb_id, context, range, self); grn_obj_reinit(context, value, value->header.domain, value->header.flags | GRN_OBJ_VECTOR); value->header.flags |= GRN_OBJ_WITH_WEIGHT; if (RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cArray))) { int i, n; n = RARRAY_LEN(rb_value); for (i = 0; i < n; i++) { unsigned int weight = 0; VALUE rb_element_value, rb_weight; rb_grn_scan_options(RARRAY_PTR(rb_value)[i], "value", &rb_element_value, "weight", &rb_weight, NULL); if (!NIL_P(rb_weight)) { weight = NUM2UINT(rb_weight); } if (value->header.type == GRN_UVECTOR) { grn_id id = RVAL2GRNID(rb_element_value, context, range, self); grn_uvector_add_element(context, value, id, weight); } else { GRN_BULK_REWIND(element_value); if (!NIL_P(rb_element_value)) { RVAL2GRNBULK(rb_element_value, context, element_value); } grn_vector_add_element(context, value, GRN_BULK_HEAD(element_value), GRN_BULK_VSIZE(element_value), weight, element_value->header.domain); } } } else if (RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cHash))) { HashElementToVectorElementData data; data.self = self; data.context = context; data.vector = value; data.element_value = element_value; data.range = range; rb_hash_foreach(rb_value, hash_element_to_vector_element, (VALUE)&data); } else { rb_raise(rb_eArgError, "<%s>: " "weight vector value must be an array of index value or " "a hash that key is vector value and value is vector weight: " "<%s>", rb_grn_inspect(self), rb_grn_inspect(rb_value)); } rc = grn_obj_set_value(context, column, id, value, flags); rb_grn_context_check(context, self); rb_grn_rc_check(rc, self); return rb_value; }
/* * It gets a value of variable size column value for the record that * ID is _id_. * * @example Gets weight vector value * Groonga::Schema.define do |schema| * schema.create_table("Products", * :type => :patricia_trie, * :key_type => "ShortText") do |table| * # This is weight vector. * # ":with_weight => true" is important to store weight value. * table.short_text("tags", * :type => :vector, * :with_weight => true) * end * end * * products = Groonga["Products"] * rroonga = products.add("Rroonga") * rroonga.tags = [ * { * :value => "ruby", * :weight => 100, * }, * { * :value => "groonga", * :weight => 10, * }, * ] * * p rroonga.tags * # => [ * # {:value => "ruby", :weight => 100}, * # {:value => "groonga", :weight => 10} * # ] * * @overload [](id) * @param [Integer, Record] id The record ID. * @return [Array<Hash<Symbol, String>>] An array of value if the column * is a weight vector column. * Each value is a Hash like the following form: * * <pre> * { * :value => [KEY], * :weight => [WEIGHT], * } * </pre> * * @[KEY]@ is the key of the table that is specified as range on * creating the weight vector. * * @[WEIGHT]@ is a positive integer. * * @return [::Object] See {Groonga::Object#[]} for columns except * weight vector column. * * @since 4.0.1. */ static VALUE rb_grn_variable_size_column_array_reference (VALUE self, VALUE rb_id) { grn_ctx *context = NULL; grn_obj *column, *range; grn_id id; grn_obj *value; VALUE rb_value; VALUE rb_range; unsigned int i, n; rb_grn_variable_size_column_deconstruct(SELF(self), &column, &context, NULL, NULL, &value, NULL, NULL, &range); if (!(column->header.flags & GRN_OBJ_WITH_WEIGHT)) { return rb_call_super(1, &rb_id); } id = RVAL2GRNID(rb_id, context, range, self); grn_obj_reinit(context, value, value->header.domain, value->header.flags | GRN_OBJ_VECTOR); grn_obj_get_value(context, column, id, value); rb_grn_context_check(context, self); rb_range = GRNTABLE2RVAL(context, range, GRN_FALSE); n = grn_vector_size(context, value); rb_value = rb_ary_new2(n); for (i = 0; i < n; i++) { VALUE rb_element_value; unsigned int weight = 0; grn_id domain; VALUE rb_element; if (value->header.type == GRN_UVECTOR) { grn_id id; id = grn_uvector_get_element(context, value, i, &weight); rb_element_value = rb_grn_record_new(rb_range, id, Qnil); } else { const char *element_value; unsigned int element_value_length; element_value_length = grn_vector_get_element(context, value, i, &element_value, &weight, &domain); rb_element_value = rb_str_new(element_value, element_value_length); } rb_element = rb_hash_new(); rb_hash_aset(rb_element, ID2SYM(rb_intern("value")), rb_element_value); rb_hash_aset(rb_element, ID2SYM(rb_intern("weight")), UINT2NUM(weight)); rb_ary_push(rb_value, rb_element); } return rb_value; }