Ejemplo n.º 1
0
static int
hash_element_to_vector_element(VALUE key, VALUE value, VALUE user_data)
{
    HashElementToVectorElementData *data =
        (HashElementToVectorElementData *)user_data;
    unsigned int weight;

    weight = NUM2UINT(value);

    if (data->vector->header.type == GRN_UVECTOR) {
        grn_id id = RVAL2GRNID(key, data->context, data->range, data->self);
        grn_uvector_add_element(data->context, data->vector, id, weight);
    } else {
        GRN_BULK_REWIND(data->element_value);
        RVAL2GRNBULK(key, data->context, data->element_value);

        grn_vector_add_element(data->context, data->vector,
                               GRN_BULK_HEAD(data->element_value),
                               GRN_BULK_VSIZE(data->element_value),
                               weight,
                               data->element_value->header.domain);
    }

    return ST_CONTINUE;
}
Ejemplo n.º 2
0
/*
 * It updates a value of variable size column value for the record
 * that ID is _id_.
 *
 * Weight vector column is a special variable size column. This
 * description describes only weight vector column. Other variable
 * size column works what you think.
 *
 * @example Use weight vector as matrix search result weight
 *    Groonga::Schema.define do |schema|
 *      schema.create_table("Products",
 *                          :type => :patricia_trie,
 *                          :key_type => "ShortText") do |table|
 *        # This is weight vector.
 *        # ":with_weight => true" is important for matrix search result weight.
 *        table.short_text("tags",
 *                         :type => :vector,
 *                         :with_weight => true)
 *      end
 *
 *      schema.create_table("Tags",
 *                          :type => :hash,
 *                          :key_type => "ShortText") do |table|
 *        # This is inverted index. It also needs ":with_weight => true".
 *        table.index("Products.tags", :with_weight => true)
 *      end
 *    end
 *
 *    products = Groonga["Products"]
 *    groonga = products.add("Groonga")
 *    groonga.tags = [
 *      {
 *        :value  => "groonga",
 *        :weight => 100,
 *      },
 *    ]
 *    rroonga = products.add("Rroonga")
 *    rroonga.tags = [
 *      {
 *        :value  => "ruby",
 *        :weight => 100,
 *      },
 *      {
 *        :value  => "groonga",
 *        :weight => 10,
 *      },
 *    ]
 *
 *    result = products.select do |record|
 *      # Search by "groonga"
 *      record.match("groonga") do |match_target|
 *        match_target.tags
 *      end
 *    end
 *
 *    result.each do |record|
 *      p [record.key.key, record.score]
 *    end
 *    # Matches all records with weight.
 *    # => ["Groonga", 101]
 *    #    ["Rroonga", 11]
 *
 *    # Increases score for "ruby" 10 times
 *    products.select(# The previous search result. Required.
 *                    :result => result,
 *                    # It just adds score to existing records in the result. Required.
 *                    :operator => Groonga::Operator::ADJUST) do |record|
 *      record.match("ruby") do |target|
 *        target.tags * 10 # 10 times
 *      end
 *    end
 *
 *    result.each do |record|
 *      p [record.key.key, record.score]
 *    end
 *    # Weight is used for increasing score.
 *    # => ["Groonga", 101]  <- Not changed.
 *    #    ["Rroonga", 1021] <- 1021 (= 101 * 10 + 1) increased.
 *
 * @overload []=(id, elements)
 *   This description is for weight vector column.
 *
 *   @param [Integer, Record] id The record ID.
 *   @param [Array<Hash<Symbol, String>>] elements An array of values
 *     for weight vector.
 *     Each value is a Hash like the following form:
 *
 *     <pre>
 *     {
 *       :value  => [KEY],
 *       :weight => [WEIGHT],
 *     }
 *     </pre>
 *
 *     @[KEY]@ must be the same type of the key of the table that is
 *     specified as range on creating the weight vector.
 *
 *     @[WEIGHT]@ must be an positive integer. Note that search
 *     becomes @weight + 1@. It means that You want to get 10 as
 *     score, you should set 9 as weight.
 *
 * @overload []=(id, value)
 *   This description is for variable size columns except weight
 *   vector column.
 *
 *   @param [Integer, Record] id The record ID.
 *   @param [::Object] value A new value.
 *   @see Groonga::Object#[]=
 *
 * @since 4.0.1
 */
static VALUE
rb_grn_variable_size_column_array_set (VALUE self, VALUE rb_id, VALUE rb_value)
{
    grn_ctx *context = NULL;
    grn_obj *column, *range;
    grn_rc rc;
    grn_id id;
    grn_obj *value, *element_value;
    int flags = GRN_OBJ_SET;

    rb_grn_variable_size_column_deconstruct(SELF(self), &column, &context,
                                            NULL, NULL, &value, &element_value,
                                            NULL, &range);

    if (!(column->header.flags & GRN_OBJ_WITH_WEIGHT)) {
        VALUE args[2];
        args[0] = rb_id;
        args[1] = rb_value;
        return rb_call_super(2, args);
    }

    id = RVAL2GRNID(rb_id, context, range, self);

    grn_obj_reinit(context, value,
                   value->header.domain,
                   value->header.flags | GRN_OBJ_VECTOR);
    value->header.flags |= GRN_OBJ_WITH_WEIGHT;
    if (RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cArray))) {
        int i, n;
        n = RARRAY_LEN(rb_value);
        for (i = 0; i < n; i++) {
            unsigned int weight = 0;
            VALUE rb_element_value, rb_weight;

            rb_grn_scan_options(RARRAY_PTR(rb_value)[i],
                                "value", &rb_element_value,
                                "weight", &rb_weight,
                                NULL);

            if (!NIL_P(rb_weight)) {
                weight = NUM2UINT(rb_weight);
            }

            if (value->header.type == GRN_UVECTOR) {
                grn_id id = RVAL2GRNID(rb_element_value, context, range, self);
                grn_uvector_add_element(context, value, id, weight);
            } else {
                GRN_BULK_REWIND(element_value);
                if (!NIL_P(rb_element_value)) {
                    RVAL2GRNBULK(rb_element_value, context, element_value);
                }

                grn_vector_add_element(context, value,
                                       GRN_BULK_HEAD(element_value),
                                       GRN_BULK_VSIZE(element_value),
                                       weight,
                                       element_value->header.domain);
            }
        }
    } else if (RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cHash))) {
        HashElementToVectorElementData data;
        data.self = self;
        data.context = context;
        data.vector = value;
        data.element_value = element_value;
        data.range = range;
        rb_hash_foreach(rb_value, hash_element_to_vector_element, (VALUE)&data);
    } else {
        rb_raise(rb_eArgError,
                 "<%s>: "
                 "weight vector value must be an array of index value or "
                 "a hash that key is vector value and value is vector weight: "
                 "<%s>",
                 rb_grn_inspect(self),
                 rb_grn_inspect(rb_value));
    }

    rc = grn_obj_set_value(context, column, id, value, flags);
    rb_grn_context_check(context, self);
    rb_grn_rc_check(rc, self);

    return rb_value;
}
Ejemplo n.º 3
0
static grn_obj *
command_tag_synonym(grn_ctx *ctx, GNUC_UNUSED int nargs, GNUC_UNUSED grn_obj **args,
                    GNUC_UNUSED grn_user_data *user_data)
{
  GNUC_UNUSED grn_obj *flags = grn_ctx_pop(ctx);
  grn_obj *newvalue = grn_ctx_pop(ctx);
  grn_obj *oldvalue = grn_ctx_pop(ctx);
  GNUC_UNUSED grn_obj *id = grn_ctx_pop(ctx);
  grn_obj buf;
  grn_obj record;
  grn_obj *domain;
  grn_obj *table;
  grn_obj *column;
  int i,n;

  if (GRN_BULK_VSIZE(newvalue) == 0 || GRN_INT32_VALUE(flags) == 0) {
    return NULL;
  }

  table = grn_ctx_at(ctx, oldvalue->header.domain);
  if (table && !is_table(table)) {
    GRN_PLUGIN_LOG(ctx, GRN_LOG_WARNING,
                   "[tag-synonym] "
                   "hooked column must be reference type");
    return NULL;
  }

  column = grn_obj_column(ctx,
                          table,
                          SYNONYM_COLUMN_NAME,
                          SYNONYM_COLUMN_NAME_LEN);
  if (!column) {
    GRN_PLUGIN_LOG(ctx, GRN_LOG_WARNING,
                   "[tag-synonym] "
                   "couldn't open synonym column");
    return NULL;
  }

  GRN_TEXT_INIT(&buf, 0);
  domain = grn_ctx_at(ctx, newvalue->header.domain);
  if (domain && is_string(domain)) {
    GRN_RECORD_INIT(&record, GRN_OBJ_VECTOR, oldvalue->header.domain);
    grn_table_tokenize(ctx, table, GRN_TEXT_VALUE(newvalue), GRN_TEXT_LEN(newvalue), &record, GRN_TRUE);
  } else if (newvalue->header.type == GRN_UVECTOR) {
    record = *newvalue;
  }

  if (is_string(domain) || newvalue->header.type == GRN_UVECTOR) {
    grn_obj value;

    GRN_RECORD_INIT(newvalue, GRN_OBJ_VECTOR, oldvalue->header.domain);
    GRN_UINT32_INIT(&value, 0);
    n = grn_vector_size(ctx, &record);
    for (i = 0; i < n; i++) {
      grn_id tid;
      tid = grn_uvector_get_element(ctx, &record, i, NULL);
      GRN_BULK_REWIND(&value);
      grn_obj_get_value(ctx, column, tid, &value);
      if (GRN_UINT32_VALUE(&value)) {
        GRN_PLUGIN_LOG(ctx, GRN_LOG_INFO,
                       "[tag-synonym] "
                       "changed: tid %d -> %d", tid, GRN_UINT32_VALUE(&value));
        tid = GRN_UINT32_VALUE(&value);
      }
      grn_uvector_add_element(ctx, newvalue, tid, 0);
    }
    grn_obj_unlink(ctx, &value);
  } else {
    grn_id tid;
    grn_obj value;
    tid = GRN_RECORD_VALUE(newvalue);
    GRN_UINT32_INIT(&value, 0);
    grn_obj_get_value(ctx, column, tid, &value);
    if (GRN_UINT32_VALUE(&value)) {
      GRN_PLUGIN_LOG(ctx, GRN_LOG_INFO,
                     "[tag-synonym] "
                     "changed: tid %d -> %d", tid, GRN_UINT32_VALUE(&value));
      tid = GRN_UINT32_VALUE(&value);
      GRN_BULK_REWIND(newvalue);
      GRN_RECORD_SET(ctx, newvalue, tid);
    }
    grn_obj_unlink(ctx, &value);
  }
  grn_obj_unlink(ctx, &buf);

  return NULL;
}
Ejemplo n.º 4
0
static grn_obj *
func_vector_slice(grn_ctx *ctx, int n_args, grn_obj **args,
                  grn_user_data *user_data)
{
  grn_obj *target;
  grn_obj *from_raw = NULL;
  grn_obj *length_raw = NULL;
  int64_t from = 0;
  int64_t length = -1;
  uint32_t to = 0;
  uint32_t size = 0;
  grn_obj *slice;

  if (n_args < 2 || n_args > 3) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                     "vector_slice(): wrong number of arguments (%d for 2..3)",
                     n_args);
    return NULL;
  }

  target = args[0];
  from_raw = args[1];
  if (n_args == 3) {
    length_raw = args[2];
  }
  switch (target->header.type) {
  case GRN_VECTOR :
  case GRN_PVECTOR :
  case GRN_UVECTOR :
    size = grn_vector_size(ctx, target);
    break;
  default :
    {
      grn_obj inspected;

      GRN_TEXT_INIT(&inspected, 0);
      grn_inspect(ctx, target, &inspected);
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "vector_slice(): target object must be vector: <%.*s>",
                       (int)GRN_TEXT_LEN(&inspected),
                       GRN_TEXT_VALUE(&inspected));
      GRN_OBJ_FIN(ctx, &inspected);
      return NULL;
    }
    break;
  }

  if (!grn_type_id_is_number_family(ctx, from_raw->header.domain)) {
    grn_obj inspected;

    GRN_TEXT_INIT(&inspected, 0);
    grn_inspect(ctx, &inspected, from_raw);
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                     "vector_slice(): from must be a number: <%.*s>",
                     (int)GRN_TEXT_LEN(&inspected),
                     GRN_TEXT_VALUE(&inspected));
    GRN_OBJ_FIN(ctx, &inspected);
    return NULL;
  }
  if (from_raw->header.domain == GRN_DB_INT32) {
    from = GRN_INT32_VALUE(from_raw);
  } else if (from_raw->header.domain == GRN_DB_INT64) {
    from = GRN_INT64_VALUE(from_raw);
  } else {
    grn_obj buffer;
    grn_rc rc;

    GRN_INT64_INIT(&buffer, 0);
    rc = grn_obj_cast(ctx, from_raw, &buffer, GRN_FALSE);
    if (rc == GRN_SUCCESS) {
      from = GRN_INT64_VALUE(&buffer);
    }
    GRN_OBJ_FIN(ctx, &buffer);

    if (rc != GRN_SUCCESS) {
      grn_obj inspected;

      GRN_TEXT_INIT(&inspected, 0);
      grn_inspect(ctx, &inspected, from_raw);
      GRN_PLUGIN_ERROR(ctx, rc,
                       "vector_slice(): "
                       "failed to cast from value to number: <%.*s>",
                       (int)GRN_TEXT_LEN(&inspected),
                       GRN_TEXT_VALUE(&inspected));
      GRN_OBJ_FIN(ctx, &inspected);
      return NULL;
    }
  }

  if (length_raw) {
    if (!grn_type_id_is_number_family(ctx, length_raw->header.domain)) {
      grn_obj inspected;

      GRN_TEXT_INIT(&inspected, 0);
      grn_inspect(ctx, &inspected, length_raw);
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "vector_slice(): length must be a number: <%.*s>",
                       (int)GRN_TEXT_LEN(&inspected),
                       GRN_TEXT_VALUE(&inspected));
      GRN_OBJ_FIN(ctx, &inspected);
      return NULL;
    }
    if (length_raw->header.domain == GRN_DB_INT32) {
      length = GRN_INT32_VALUE(length_raw);
    } else if (length_raw->header.domain == GRN_DB_INT64) {
      length = GRN_INT64_VALUE(length_raw);
    } else {
      grn_obj buffer;
      grn_rc rc;

      GRN_INT64_INIT(&buffer, 0);
      rc = grn_obj_cast(ctx, length_raw, &buffer, GRN_FALSE);
      if (rc == GRN_SUCCESS) {
        length = GRN_INT64_VALUE(&buffer);
      }
      GRN_OBJ_FIN(ctx, &buffer);

      if (rc != GRN_SUCCESS) {
        grn_obj inspected;

        GRN_TEXT_INIT(&inspected, 0);
        grn_inspect(ctx, &inspected, length_raw);
        GRN_PLUGIN_ERROR(ctx, rc,
                         "vector_slice(): "
                         "failed to cast length value to number: <%.*s>",
                         (int)GRN_TEXT_LEN(&inspected),
                         GRN_TEXT_VALUE(&inspected));
        GRN_OBJ_FIN(ctx, &inspected);
        return NULL;
      }
    }
  }

  slice = grn_plugin_proc_alloc(ctx, user_data, target->header.domain, GRN_OBJ_VECTOR);
  if (!slice) {
    return NULL;
  }

  if (target->header.flags & GRN_OBJ_WITH_WEIGHT) {
    slice->header.flags |= GRN_OBJ_WITH_WEIGHT;
  }

  if (length < 0) {
    length = size + length + 1;
  }

  if (length > size) {
    length = size;
  }

  if (length <= 0) {
    return slice;
  }

  while (from < 0) {
    from += size;
  }

  to = from + length;
  if (to > size) {
    to = size;
  }

  switch (target->header.type) {
  case GRN_VECTOR :
    {
      unsigned int i;
      for (i = from; i < to; i++) {
        const char *content;
        unsigned int content_length;
        unsigned int weight;
        grn_id domain;
        content_length = grn_vector_get_element(ctx, target, i,
                                                &content, &weight, &domain);
        grn_vector_add_element(ctx, slice,
                               content, content_length, weight, domain);
      }
    }
    break;
  case GRN_PVECTOR :
    {
      unsigned int i;
      for (i = from; i < to; i++) {
        grn_obj *element = GRN_PTR_VALUE_AT(target, i);
        GRN_PTR_PUT(ctx, slice, element);
      }
    }
    break;
  case GRN_UVECTOR :
    {
      unsigned int i;
      for (i = from; i < to; i++) {
        grn_id id;
        unsigned int weight;
        id = grn_uvector_get_element(ctx, target, i, &weight);
        grn_uvector_add_element(ctx, slice, id, weight);
      }
    }
    break;
  }

  return slice;
}