Пример #1
0
grn_obj *
rb_grn_key_from_ruby_object (VALUE rb_key, grn_ctx *context,
                             grn_obj *key, grn_id domain_id, grn_obj *domain,
                             VALUE related_object)
{
    grn_id id;

    if (!domain)
        return RVAL2GRNBULK(rb_key, context, key);

    switch (domain->header.type) {
    case GRN_TYPE:
        return RVAL2GRNBULK_WITH_TYPE(rb_key, context, key, domain_id, domain);
        break;
    case GRN_TABLE_HASH_KEY:
    case GRN_TABLE_PAT_KEY:
    case GRN_TABLE_DAT_KEY:
    case GRN_TABLE_NO_KEY:
        id = RVAL2GRNID(rb_key, context, domain, related_object);
        break;
    default:
        if (!RVAL2CBOOL(rb_obj_is_kind_of(rb_key, rb_cInteger)))
            rb_raise(rb_eGrnError,
                     "should be unsigned integer: <%s>: <%s>",
                     rb_grn_inspect(rb_key),
                     rb_grn_inspect(related_object));

        id = NUM2UINT(rb_key);
        break;
    }

    GRN_TEXT_SET(context, key, &id, sizeof(id));
    return key;
}
Пример #2
0
static int
hash_element_to_vector_element(VALUE key, VALUE value, VALUE user_data)
{
    HashElementToVectorElementData *data =
        (HashElementToVectorElementData *)user_data;
    unsigned int weight;

    weight = NUM2UINT(value);

    if (data->vector->header.type == GRN_UVECTOR) {
        grn_id id = RVAL2GRNID(key, data->context, data->range, data->self);
        grn_uvector_add_element(data->context, data->vector, id, weight);
    } else {
        GRN_BULK_REWIND(data->element_value);
        RVAL2GRNBULK(key, data->context, data->element_value);

        grn_vector_add_element(data->context, data->vector,
                               GRN_BULK_HEAD(data->element_value),
                               GRN_BULK_VSIZE(data->element_value),
                               weight,
                               data->element_value->header.domain);
    }

    return ST_CONTINUE;
}
Пример #3
0
/*
 * IDが _id_ であるレコードを高速に全文検索するため転置索引を作
 * 成する。多くの場合、 {Groonga::Table#define_index_column} で
 * +:source+ オプションを指定することにより、自動的に全文検索
 * 用の索引は更新されるので、明示的にこのメソッドを使うこと
 * は少ない。
 *
 * @example 記事の段落毎に索引を作成する。
 *   articles = Groonga::Array.create(:name => "<articles>")
 *   articles.define_column("title", "ShortText")
 *   articles.define_column("content", "Text")
 *
 *   terms = Groonga::Hash.create(:name => "<terms>",
 *                                :default_tokenizer => "TokenBigram")
 *   content_index = terms.define_index_column("content", articles,
 *                                             :with_section => true)
 *
 *   content = <<-EOC
 *   groonga は組み込み型の全文検索エンジンライブラリです。
 *   DBMSやスクリプト言語処理系等に組み込むことによって、その
 *   全文検索機能を強化することができます。また、リレーショナ
 *   ルモデルに基づくデータストア機能を内包しており、groonga
 *   単体でも高速なデータストアサーバとして使用することができ
 *   ます。
 *
 *   ■全文検索方式
 *   転置索引型の全文検索エンジンです。転置索引は圧縮されてファ
 *   イルに格納され、検索時のディスク読み出し量を小さく、かつ
 *   局所的に抑えるように設計されています。用途に応じて以下の
 *   索引タイプを選択できます。
 *   EOC
 *
 *   groonga = articles.add(:title => "groonga", :content => content)
 *
 *   content.split(/\n{2,}/).each_with_index do |sentence, i|
 *     content_index[groonga] = {:value => sentence, :section => i + 1}
 *   end
 *
 *   content_index.search("エンジン").collect do |record|
 *     p record.key["title"] # -> "groonga"
 *   end
 *
 * @overload []=(id, value)
 *   @param [String] value 新しい値
 * @overload []=(id, options)
 *   _options_ を指定することにより、 _value_ を指定したときよりも索引の作
 *   成を制御できる。
 *   @param [::Hash] options The name and value
 *     pairs. Omitted names are initialized as the default value
 *   @option options :section
 *     段落番号を指定する。省略した場合は1を指定したとみなされ
 *     る。
 *     {Groonga::Table#define_index_column} で
 *     @{:with_section => true}@ を指定していなければい
 *     けない。
 *   @option options :old_value
 *     以前の値を指定する。省略した場合は現在の値が用いられる。
 *     通常は指定する必要はない。
 *   @option options :value
 *     新しい値を指定する。 _value_ を指定した場合と _options_ で
 *     @{:value => value}@ を指定した場合は同じ動作とな
 *     る。
 *
 * @deprecated Since 3.0.2. Use {#add}, {#delete} or {#update} instead.
 */
static VALUE
rb_grn_index_column_array_set (VALUE self, VALUE rb_id, VALUE rb_value)
{
    grn_ctx *context = NULL;
    grn_obj *column, *range;
    grn_rc rc;
    grn_id id;
    unsigned int section;
    grn_obj *old_value, *new_value;
    VALUE original_rb_value, rb_section, rb_old_value, rb_new_value;

    original_rb_value = rb_value;

    rb_grn_index_column_deconstruct(SELF(self), &column, &context,
                                    NULL, NULL,
                                    &new_value, &old_value,
                                    NULL, &range,
                                    NULL, NULL);

    id = RVAL2GRNID(rb_id, context, range, self);

    if (!RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cHash))) {
        VALUE hash_value;
        hash_value = rb_hash_new();
        rb_hash_aset(hash_value, RB_GRN_INTERN("value"), rb_value);
        rb_value = hash_value;
    }

    rb_grn_scan_options(rb_value,
                        "section", &rb_section,
                        "old_value", &rb_old_value,
                        "value", &rb_new_value,
                        NULL);

    if (NIL_P(rb_section))
        section = 1;
    else
        section = NUM2UINT(rb_section);

    if (NIL_P(rb_old_value)) {
        old_value = NULL;
    } else {
        GRN_BULK_REWIND(old_value);
        RVAL2GRNBULK(rb_old_value, context, old_value);
    }

    if (NIL_P(rb_new_value)) {
        new_value = NULL;
    } else {
        GRN_BULK_REWIND(new_value);
        RVAL2GRNBULK(rb_new_value, context, new_value);
    }

    rc = grn_column_index_update(context, column,
                                 id, section, old_value, new_value);
    rb_grn_context_check(context, self);
    rb_grn_rc_check(rc, self);

    return original_rb_value;
}
Пример #4
0
/*
 * Updates a record that has @new_value@ as new content and
 * @old_value@ as old content in inverted index. Normally, this method
 * is not used explicitly. Inverted index for fulltext search is
 * updated automatically by using @:source@ option of
 * {Groonga::Table#define_index_column}.
 *
 * @example Updates sentences of an article in index
 *   articles = Groonga::Array.create(:name => "Articles")
 *   articles.define_column("title", "ShortText")
 *   articles.define_column("content", "Text")
 *
 *   terms = Groonga::Hash.create(:name => "Terms",
 *                                :key_type => "ShortText",
 *                                :default_tokenizer => "TokenBigram")
 *   content_index = terms.define_index_column("content", articles,
 *                                             :with_position => true,
 *                                             :with_section => true)
 *
 *   old_sentence = <<-SENTENCE
 *   Groonga is a fast and accurate full text search engine based on
 *   inverted index. One of the characteristics of groonga is that a
 *   newly registered document instantly appears in search
 *   results. Also, groonga allows updates without read locks. These
 *   characteristics result in superior performance on real-time
 *   applications.
 *   SENTENCE
 *
 *   new_sentence = <<-SENTENCE
 *   Groonga is also a column-oriented database management system
 *   (DBMS). Compared with well-known row-oriented systems, such as
 *   MySQL and PostgreSQL, column-oriented systems are more suited for
 *   aggregate queries. Due to this advantage, groonga can cover
 *   weakness of row-oriented systems.
 *   SENTENCE
 *
 *   groonga = articles.add(:title => "groonga", :content => old_sentence)
 *
 *   content_index.add(groonga, old_sentence, :section => 1)
 *   p content_index.search("engine").size # -> 1
 *   p content_index.search("MySQL").size  # -> 0
 *
 *   groonga[:content] = new_sentence
 *   content_index.update(groonga, old_sentence, new_sentence, :section => 1)
 *   p content_index.search("engine").size # -> 0
 *   p content_index.search("MySQL").size  # -> 1
 *
 * @overload update(record, old_value, new_value, options={})
 *   @param [Groonga::Record, Integer] record
 *     The record that has a @new_value@ as its new value and
 *     @old_value@ as its old value. It can be Integer as record id.
 *   @param [String] old_value
 *     The old value of the @record@.
 *   @param [String] new_value
 *     The new value of the @record@.
 *   @param [::Hash] options
 *     The options.
 *   @option options [Integer] :section (1)
 *     The section number. It is one-origin.
 *
 *     You must specify @{:with_section => true}@ in
 *     {Groonga::Table#define_index_column} to use this option.
 *   @return [void]
 *
 * @since 3.0.2
 */
static VALUE
rb_grn_index_column_update (int argc, VALUE *argv, VALUE self)
{
    grn_ctx *context = NULL;
    grn_obj *column, *range;
    grn_rc rc;
    grn_id id;
    unsigned int section;
    grn_obj *old_value, *new_value;
    VALUE rb_record, rb_old_value, rb_new_value, rb_options, rb_section;

    rb_scan_args(argc, argv, "31",
                 &rb_record, &rb_old_value, &rb_new_value, &rb_options);

    rb_grn_index_column_deconstruct(SELF(self), &column, &context,
                                    NULL, NULL,
                                    &new_value, &old_value,
                                    NULL, &range,
                                    NULL, NULL);

    id = RVAL2GRNID(rb_record, context, range, self);

    if (NIL_P(rb_old_value)) {
        old_value = NULL;
    } else {
        GRN_BULK_REWIND(old_value);
        RVAL2GRNBULK(rb_old_value, context, old_value);
    }

    if (NIL_P(rb_new_value)) {
        new_value = NULL;
    } else {
        GRN_BULK_REWIND(new_value);
        RVAL2GRNBULK(rb_new_value, context, new_value);
    }

    rb_grn_scan_options(rb_options,
                        "section", &rb_section,
                        NULL);

    if (NIL_P(rb_section)) {
        section = 1;
    } else {
        section = NUM2UINT(rb_section);
    }

    rc = grn_column_index_update(context, column, id, section,
                                 old_value, new_value);
    rb_grn_context_check(context, self);
    rb_grn_rc_check(rc, self);

    return self;
}
Пример #5
0
/*
 * It updates a value of variable size column value for the record
 * that ID is _id_.
 *
 * Weight vector column is a special variable size column. This
 * description describes only weight vector column. Other variable
 * size column works what you think.
 *
 * @example Use weight vector as matrix search result weight
 *    Groonga::Schema.define do |schema|
 *      schema.create_table("Products",
 *                          :type => :patricia_trie,
 *                          :key_type => "ShortText") do |table|
 *        # This is weight vector.
 *        # ":with_weight => true" is important for matrix search result weight.
 *        table.short_text("tags",
 *                         :type => :vector,
 *                         :with_weight => true)
 *      end
 *
 *      schema.create_table("Tags",
 *                          :type => :hash,
 *                          :key_type => "ShortText") do |table|
 *        # This is inverted index. It also needs ":with_weight => true".
 *        table.index("Products.tags", :with_weight => true)
 *      end
 *    end
 *
 *    products = Groonga["Products"]
 *    groonga = products.add("Groonga")
 *    groonga.tags = [
 *      {
 *        :value  => "groonga",
 *        :weight => 100,
 *      },
 *    ]
 *    rroonga = products.add("Rroonga")
 *    rroonga.tags = [
 *      {
 *        :value  => "ruby",
 *        :weight => 100,
 *      },
 *      {
 *        :value  => "groonga",
 *        :weight => 10,
 *      },
 *    ]
 *
 *    result = products.select do |record|
 *      # Search by "groonga"
 *      record.match("groonga") do |match_target|
 *        match_target.tags
 *      end
 *    end
 *
 *    result.each do |record|
 *      p [record.key.key, record.score]
 *    end
 *    # Matches all records with weight.
 *    # => ["Groonga", 101]
 *    #    ["Rroonga", 11]
 *
 *    # Increases score for "ruby" 10 times
 *    products.select(# The previous search result. Required.
 *                    :result => result,
 *                    # It just adds score to existing records in the result. Required.
 *                    :operator => Groonga::Operator::ADJUST) do |record|
 *      record.match("ruby") do |target|
 *        target.tags * 10 # 10 times
 *      end
 *    end
 *
 *    result.each do |record|
 *      p [record.key.key, record.score]
 *    end
 *    # Weight is used for increasing score.
 *    # => ["Groonga", 101]  <- Not changed.
 *    #    ["Rroonga", 1021] <- 1021 (= 101 * 10 + 1) increased.
 *
 * @overload []=(id, elements)
 *   This description is for weight vector column.
 *
 *   @param [Integer, Record] id The record ID.
 *   @param [Array<Hash<Symbol, String>>] elements An array of values
 *     for weight vector.
 *     Each value is a Hash like the following form:
 *
 *     <pre>
 *     {
 *       :value  => [KEY],
 *       :weight => [WEIGHT],
 *     }
 *     </pre>
 *
 *     @[KEY]@ must be the same type of the key of the table that is
 *     specified as range on creating the weight vector.
 *
 *     @[WEIGHT]@ must be an positive integer. Note that search
 *     becomes @weight + 1@. It means that You want to get 10 as
 *     score, you should set 9 as weight.
 *
 * @overload []=(id, value)
 *   This description is for variable size columns except weight
 *   vector column.
 *
 *   @param [Integer, Record] id The record ID.
 *   @param [::Object] value A new value.
 *   @see Groonga::Object#[]=
 *
 * @since 4.0.1
 */
static VALUE
rb_grn_variable_size_column_array_set (VALUE self, VALUE rb_id, VALUE rb_value)
{
    grn_ctx *context = NULL;
    grn_obj *column, *range;
    grn_rc rc;
    grn_id id;
    grn_obj *value, *element_value;
    int flags = GRN_OBJ_SET;

    rb_grn_variable_size_column_deconstruct(SELF(self), &column, &context,
                                            NULL, NULL, &value, &element_value,
                                            NULL, &range);

    if (!(column->header.flags & GRN_OBJ_WITH_WEIGHT)) {
        VALUE args[2];
        args[0] = rb_id;
        args[1] = rb_value;
        return rb_call_super(2, args);
    }

    id = RVAL2GRNID(rb_id, context, range, self);

    grn_obj_reinit(context, value,
                   value->header.domain,
                   value->header.flags | GRN_OBJ_VECTOR);
    value->header.flags |= GRN_OBJ_WITH_WEIGHT;
    if (RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cArray))) {
        int i, n;
        n = RARRAY_LEN(rb_value);
        for (i = 0; i < n; i++) {
            unsigned int weight = 0;
            VALUE rb_element_value, rb_weight;

            rb_grn_scan_options(RARRAY_PTR(rb_value)[i],
                                "value", &rb_element_value,
                                "weight", &rb_weight,
                                NULL);

            if (!NIL_P(rb_weight)) {
                weight = NUM2UINT(rb_weight);
            }

            if (value->header.type == GRN_UVECTOR) {
                grn_id id = RVAL2GRNID(rb_element_value, context, range, self);
                grn_uvector_add_element(context, value, id, weight);
            } else {
                GRN_BULK_REWIND(element_value);
                if (!NIL_P(rb_element_value)) {
                    RVAL2GRNBULK(rb_element_value, context, element_value);
                }

                grn_vector_add_element(context, value,
                                       GRN_BULK_HEAD(element_value),
                                       GRN_BULK_VSIZE(element_value),
                                       weight,
                                       element_value->header.domain);
            }
        }
    } else if (RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cHash))) {
        HashElementToVectorElementData data;
        data.self = self;
        data.context = context;
        data.vector = value;
        data.element_value = element_value;
        data.range = range;
        rb_hash_foreach(rb_value, hash_element_to_vector_element, (VALUE)&data);
    } else {
        rb_raise(rb_eArgError,
                 "<%s>: "
                 "weight vector value must be an array of index value or "
                 "a hash that key is vector value and value is vector weight: "
                 "<%s>",
                 rb_grn_inspect(self),
                 rb_grn_inspect(rb_value));
    }

    rc = grn_obj_set_value(context, column, id, value, flags);
    rb_grn_context_check(context, self);
    rb_grn_rc_check(rc, self);

    return rb_value;
}
Пример #6
0
/*
 * It gets a value of variable size column value for the record that
 * ID is _id_.
 *
 * @example Gets weight vector value
 *    Groonga::Schema.define do |schema|
 *      schema.create_table("Products",
 *                          :type => :patricia_trie,
 *                          :key_type => "ShortText") do |table|
 *        # This is weight vector.
 *        # ":with_weight => true" is important to store weight value.
 *        table.short_text("tags",
 *                         :type => :vector,
 *                         :with_weight => true)
 *      end
 *    end
 *
 *    products = Groonga["Products"]
 *    rroonga = products.add("Rroonga")
 *    rroonga.tags = [
 *      {
 *        :value  => "ruby",
 *        :weight => 100,
 *      },
 *      {
 *        :value  => "groonga",
 *        :weight => 10,
 *      },
 *    ]
 *
 *    p rroonga.tags
 *    # => [
 *    #      {:value => "ruby",    :weight => 100},
 *    #      {:value => "groonga", :weight => 10}
 *    #    ]
 *
 * @overload [](id)
 *   @param [Integer, Record] id The record ID.
 *   @return [Array<Hash<Symbol, String>>] An array of value if the column
 *     is a weight vector column.
 *     Each value is a Hash like the following form:
 *
 *     <pre>
 *     {
 *       :value  => [KEY],
 *       :weight => [WEIGHT],
 *     }
 *     </pre>
 *
 *     @[KEY]@ is the key of the table that is specified as range on
 *     creating the weight vector.
 *
 *     @[WEIGHT]@ is a positive integer.
 *
 *   @return [::Object] See {Groonga::Object#[]} for columns except
 *     weight vector column.
 *
 * @since 4.0.1.
 */
static VALUE
rb_grn_variable_size_column_array_reference (VALUE self, VALUE rb_id)
{
    grn_ctx *context = NULL;
    grn_obj *column, *range;
    grn_id id;
    grn_obj *value;
    VALUE rb_value;
    VALUE rb_range;
    unsigned int i, n;

    rb_grn_variable_size_column_deconstruct(SELF(self), &column, &context,
                                            NULL, NULL, &value, NULL,
                                            NULL, &range);

    if (!(column->header.flags & GRN_OBJ_WITH_WEIGHT)) {
        return rb_call_super(1, &rb_id);
    }

    id = RVAL2GRNID(rb_id, context, range, self);

    grn_obj_reinit(context, value,
                   value->header.domain,
                   value->header.flags | GRN_OBJ_VECTOR);
    grn_obj_get_value(context, column, id, value);
    rb_grn_context_check(context, self);

    rb_range = GRNTABLE2RVAL(context, range, GRN_FALSE);

    n = grn_vector_size(context, value);
    rb_value = rb_ary_new2(n);
    for (i = 0; i < n; i++) {
        VALUE rb_element_value;
        unsigned int weight = 0;
        grn_id domain;
        VALUE rb_element;

        if (value->header.type == GRN_UVECTOR) {
            grn_id id;
            id = grn_uvector_get_element(context, value, i, &weight);
            rb_element_value = rb_grn_record_new(rb_range, id, Qnil);
        } else {
            const char *element_value;
            unsigned int element_value_length;
            element_value_length = grn_vector_get_element(context,
                                                          value,
                                                          i,
                                                          &element_value,
                                                          &weight,
                                                          &domain);
            rb_element_value = rb_str_new(element_value, element_value_length);
        }

        rb_element = rb_hash_new();
        rb_hash_aset(rb_element,
                     ID2SYM(rb_intern("value")),
                     rb_element_value);
        rb_hash_aset(rb_element,
                     ID2SYM(rb_intern("weight")),
                     UINT2NUM(weight));

        rb_ary_push(rb_value, rb_element);
    }

    return rb_value;
}