static void
send_command(grn_ctx *ctx, grn_obj *buffer, const char *command,
             const char *dataset_name)
{
    const char *p = command;
    const char *dataset_place_holder = "${DATASET}";
    char *dataset_place_holder_position;

    if (ctx->rc != GRN_SUCCESS) {
        return;
    }

    GRN_BULK_REWIND(buffer);
    while ((dataset_place_holder_position = strstr(p, dataset_place_holder))) {
        GRN_TEXT_PUT(ctx, buffer, p, dataset_place_holder_position - p);
        GRN_TEXT_PUTS(ctx, buffer, dataset_name);
        p = dataset_place_holder_position + strlen(dataset_place_holder);
    }
    GRN_TEXT_PUTS(ctx, buffer, p);
    GRN_TEXT_PUTS(ctx, buffer, "\n");
    printf("> %.*s", (int)GRN_TEXT_LEN(buffer), GRN_TEXT_VALUE(buffer));
    grn_ctx_send(ctx, GRN_TEXT_VALUE(buffer), GRN_TEXT_LEN(buffer), 0);
    output(ctx);
}
Пример #2
0
void
test_expr_query(void)
{
  grn_obj *t1, *c1, *lc, *ft, *v, *expr;
  grn_obj textbuf, intbuf;
  grn_id r1, r2, r3, r4;

  /* actual table */
  t1 = grn_table_create(&context, "t1", 2, NULL,
			GRN_OBJ_TABLE_NO_KEY|GRN_OBJ_PERSISTENT, NULL, 0);
  cut_assert_not_null(t1);

  /* lexicon table */
  lc = grn_table_create(&context, "lc", 2, NULL,
			GRN_OBJ_TABLE_PAT_KEY|GRN_OBJ_PERSISTENT,
                        grn_ctx_at(&context, GRN_DB_SHORTTEXT), 0);
  cut_assert_not_null(lc);
  grn_test_assert(grn_obj_set_info(&context, lc, GRN_INFO_DEFAULT_TOKENIZER,
				   grn_ctx_at(&context, GRN_DB_BIGRAM)));

  /* actual column */
  c1 = grn_column_create(&context, t1, "c1", 2, NULL,
			 GRN_OBJ_COLUMN_SCALAR|GRN_OBJ_PERSISTENT,
			 grn_ctx_at(&context, GRN_DB_TEXT));
  cut_assert_not_null(c1);

  /* fulltext index */
  ft = grn_column_create(&context, lc, "ft", 2, NULL,
			 GRN_OBJ_COLUMN_INDEX|GRN_OBJ_PERSISTENT|GRN_OBJ_WITH_POSITION, t1);
  cut_assert_not_null(ft);

  GRN_TEXT_INIT(&textbuf, 0);
  GRN_UINT32_INIT(&intbuf, 0);

  /* link between actual column and fulltext index */
  GRN_UINT32_SET(&context, &intbuf, grn_obj_id(&context, c1));
  grn_obj_set_info(&context, ft, GRN_INFO_SOURCE, &intbuf); /* need to use grn_id */

  /* insert row */
  r1 = grn_table_add(&context, t1, NULL, 0, NULL);
  cut_assert_equal_int(1, r1);
  GRN_TEXT_SETS(&context, &textbuf, "abhij");
  grn_test_assert(grn_obj_set_value(&context, c1, r1, &textbuf, GRN_OBJ_SET));

  r2 = grn_table_add(&context, t1, NULL, 0, NULL);
  cut_assert_equal_int(2, r2);
  GRN_TEXT_SETS(&context, &textbuf, "fghij");
  grn_test_assert(grn_obj_set_value(&context, c1, r2, &textbuf, GRN_OBJ_SET));

  r3 = grn_table_add(&context, t1, NULL, 0, NULL);
  cut_assert_equal_int(3, r3);
  GRN_TEXT_SETS(&context, &textbuf, "11 22 33");
  grn_test_assert(grn_obj_set_value(&context, c1, r3, &textbuf, GRN_OBJ_SET));

  r4 = grn_table_add(&context, t1, NULL, 0, NULL);
  cut_assert_equal_int(4, r4);
  GRN_TEXT_SETS(&context, &textbuf, "44 22 55");
  grn_test_assert(grn_obj_set_value(&context, c1, r4, &textbuf, GRN_OBJ_SET));

  /* confirm record are inserted in both column and index */
  cut_assert_equal_int(4, grn_table_size(&context, t1));
  cut_assert_equal_int(17, grn_table_size(&context, lc));

  cut_assert_not_null((expr = grn_expr_create(&context, NULL, 0)));

  v = grn_expr_add_var(&context, expr, NULL, 0);

  GRN_BULK_REWIND(&textbuf);
  grn_expr_append_const(&context, expr, &textbuf);
  GRN_UINT32_SET(&context, &intbuf, GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC);
  grn_expr_append_const(&context, expr, &intbuf);
  grn_expr_append_obj(&context, expr, t1);
  GRN_UINT32_SET(&context, &intbuf, 0);
  grn_expr_append_const(&context, expr, &intbuf);
  grn_expr_append_op(&context, expr, GRN_OP_TABLE_CREATE, 4);

  grn_expr_append_obj(&context, expr, v);
  grn_expr_append_op(&context, expr, GRN_OP_VAR_SET_VALUE, 2);

  grn_expr_append_obj(&context, expr, ft);
  GRN_TEXT_SETS(&context, &textbuf, "hij");
  grn_expr_append_const(&context, expr, &textbuf);
  grn_expr_append_obj(&context, expr, v);
  GRN_UINT32_SET(&context, &intbuf, GRN_SEL_OR);
  grn_expr_append_const(&context, expr, &intbuf);
  grn_expr_append_op(&context, expr, GRN_OP_OBJ_SEARCH, 4);

  grn_expr_append_obj(&context, expr, v);
  GRN_TEXT_SETS(&context, &textbuf, ".c1 .:score");
  grn_expr_append_const(&context, expr, &textbuf);
  GRN_BULK_REWIND(&textbuf);
  grn_expr_append_obj(&context, expr, &textbuf);
  grn_expr_append_op(&context, expr, GRN_OP_JSON_PUT, 3);

  grn_expr_compile(&context, expr);

  grn_expr_exec(&context, expr);

  cut_assert_equal_uint(0, grn_obj_close(&context, expr));

  cut_assert_equal_substring("[[\"abhij\", 1], [\"fghij\", 1]]",
                             GRN_TEXT_VALUE(&textbuf), GRN_TEXT_LEN(&textbuf));

  grn_obj_close(&context, &textbuf);
  grn_obj_close(&context, ft);
  grn_obj_close(&context, c1);
  grn_obj_close(&context, lc);
  grn_obj_close(&context, t1);
}
Пример #3
0
void
test_persistent_expr(void)
{
  int i;
  grn_obj *t1, *t2, *c1, *c2, buf;
  t1 = grn_table_create(&context, "t1", 2, NULL,
                        GRN_OBJ_TABLE_NO_KEY|GRN_OBJ_PERSISTENT, NULL, 0);
  cut_assert_not_null(t1);
  t2 = grn_table_create(&context, "t2", 2, NULL,
                        GRN_OBJ_TABLE_NO_KEY|GRN_OBJ_PERSISTENT, NULL, 0);
  cut_assert_not_null(t2);
  c1 = grn_column_create(&context, t1, "c1", 2, NULL,
                         GRN_OBJ_PERSISTENT, t2);
  cut_assert_not_null(c1);
  c2 = grn_column_create(&context, t2, "c2", 2, NULL,
                         GRN_OBJ_PERSISTENT, t1);
  cut_assert_not_null(c2);
  GRN_TEXT_INIT(&buf, 0);
  for (i = 0; i < NRECORDS; i++) {
    grn_id i1, i2;
    i1 = grn_table_add(&context, t1, NULL, 0, NULL);
    i2 = grn_table_add(&context, t2, NULL, 0, NULL);
    GRN_BULK_REWIND(&buf);
    grn_bulk_write(&context, &buf, (char *)&i2, sizeof(grn_id));
    grn_obj_set_value(&context, c1, i1, &buf, GRN_OBJ_SET);
    grn_obj_set_value(&context, c2, i2, &buf, GRN_OBJ_SET);
  }
  {
    grn_obj *expr = grn_expr_create(&context, "test", 4);
    grn_obj *v;
    cut_assert_not_null(expr);
    v = grn_expr_add_var(&context, expr, "foo", 3);
    GRN_RECORD_INIT(v, 0, grn_obj_id(&context, t1));
    grn_expr_append_obj(&context, expr, v);

    GRN_TEXT_SETS(&context, &buf, "c1");
    grn_expr_append_const(&context, expr, &buf);
    grn_expr_append_op(&context, expr, GRN_OP_OBJ_GET_VALUE, 2);
    GRN_TEXT_SETS(&context, &buf, "c2");
    grn_expr_append_const(&context, expr, &buf);
    grn_expr_append_op(&context, expr, GRN_OP_OBJ_GET_VALUE, 2);
    GRN_TEXT_SETS(&context, &buf, "c1");
    grn_expr_append_const(&context, expr, &buf);

    /*
    GRN_TEXT_SETS(&context, &buf, "c1.c2.c1");
    grn_expr_append_const(&context, expr, &buf);
    */

    grn_expr_append_op(&context, expr, GRN_OP_OBJ_GET_VALUE, 2);
    grn_expr_compile(&context, expr);
  }
  cut_assert_equal_uint(0, grn_obj_close(&context, &buf));

  grn_db_close(&context, database);
  database = grn_db_open(&context, path);

  GRN_TEXT_INIT(&buf, 0);

  {
    grn_id id;
    uint64_t et;
    int nerr = 0;
    grn_obj *r, *v;
    grn_table_cursor *tc;
    struct timeval tvb, tve;
    grn_obj *expr = grn_ctx_get(&context, "test", 4);
    v = grn_expr_get_var(&context, expr, "foo", 3);
    t1 = grn_ctx_get(&context, "t1", 2);
    tc = grn_table_cursor_open(&context, t1, NULL, 0, NULL, 0, 0);
    cut_assert_not_null(tc);
    gettimeofday(&tvb, NULL);
    while ((id = grn_table_cursor_next(&context, tc))) {
      GRN_RECORD_SET(&context, v, id);
      r = grn_expr_exec(&context, expr);
      if (GRN_RECORD_VALUE(r) != id) { nerr++; }
    }
    gettimeofday(&tve, NULL);
    et = (tve.tv_sec - tvb.tv_sec) * 1000000 + (tve.tv_usec - tvb.tv_usec);
    // printf("et=%zu\n", et);
    cut_assert_equal_uint(0, nerr);
    cut_assert_equal_uint(0, grn_table_cursor_close(&context, tc));
  }
  cut_assert_equal_uint(0, grn_obj_close(&context, &buf));
}
Пример #4
0
void
test_near_geo_point(gpointer data)
{
  grn_id id;
  int offset, limit;
  const GList *expected_keys;
  GList *actual_keys = NULL;
  grn_table_sort_key keys[2];
  grn_obj base, base_string, location;

  create_geo_table(cut_take_printf(" [\"%s\"],\n"
                                   " [\"%s\"],\n"
                                   " [\"%s\"],\n"
                                   " [\"%s\"],\n"
                                   " [\"%s\"],\n"
                                   " [\"%s\"],\n"
                                   " [\"%s\"],\n"
                                   " [\"%s\"],\n"
                                   " [\"%s\"],\n"
                                   " [\"%s\"],\n"
                                   " [\"%s\"],\n"
                                   " [\"%s\"],\n"
                                   " [\"%s\"],\n"
                                   " [\"%s\"],\n"
                                   " [\"%s\"]",
                                   TAKEN_POINT(0, 0, 0,
                                               180, 0, 0),
                                   TAKEN_POINT(0, 0, 0,
                                               -179, -59, -59),
                                   TAKEN_POINT(-1, -1, -1,
                                               180, 0, 0),
                                   TAKEN_POINT(2, 1, 1,
                                               180, 0, 0),
                                   TAKEN_POINT(-2, -1, -1,
                                               -179, -59, -59),
                                   TAKEN_POINT(1, 2, 1,
                                               -179, -59, -59),
                                   TAKEN_POINT(90, 0, 0,
                                               0, 0, 0),
                                   TAKEN_POINT(-90, 0, 0,
                                               1, 0, 0),
                                   TAKEN_POINT(1, 0, 0,
                                               1, 0, 0),
                                   TAKEN_POINT(1, 1, 0,
                                               1, 1, 0),
                                   TAKEN_POINT(1, 1, 1,
                                               1, 1, 1),
                                   TAKEN_POINT(-1, 0, 0,
                                               1, 1, 1),
                                   TAKEN_POINT(-1, -1, -1,
                                               0, 0, 0),
                                   TAKEN_POINT(-1, -2, -1,
                                               -1, -1, -1),
                                   TAKEN_POINT(1, 1, 10,
                                               -1, -1, -1)));

  result = grn_table_create(context, NULL, 0, NULL, GRN_TABLE_NO_KEY,
                            NULL, table);
  grn_test_assert_context(context);

  GRN_TEXT_INIT(&base_string, 0);
  GRN_TEXT_SETS(context, &base_string, gcut_data_get_string(data, "base"));
  GRN_WGS84_GEO_POINT_INIT(&base, 0);
  grn_obj_cast(context, &base_string, &base, FALSE);
  GRN_OBJ_FIN(context, &base_string);

  offset = gcut_data_get_int(data, "offset");
  if (offset > 0) {
    cut_omit("geo sort doesn't support offset yet.");
  }
  limit = gcut_data_get_int(data, "limit");
  keys[0].key = column;
  keys[0].flags = GRN_TABLE_SORT_GEO;
  keys[0].offset = 0;
  keys[1].key = &base;
  keys[1].flags = 0;
  keys[1].offset = 0;
  grn_table_sort(context, table, offset, limit, result, keys, 2);
  GRN_OBJ_FIN(context, &base);
  grn_test_assert_context(context);
  cursor = grn_table_cursor_open(context, result,
                                 NULL, 0, NULL, 0, 0, -1,
                                 GRN_CURSOR_ASCENDING);
  grn_test_assert_context(context);
  GRN_WGS84_GEO_POINT_INIT(&location, 0);
  while ((id = grn_table_cursor_next(context, cursor))) {
    gint32 *key;
    int key_size;
    gint latitude, longitude;

    key_size = grn_table_cursor_get_value(context, cursor, (void **)&key);
    GRN_BULK_REWIND(&location);
    grn_obj_get_value(context, column, *key, &location);
    GRN_GEO_POINT_VALUE(&location, latitude, longitude);
    actual_keys = g_list_append(actual_keys,
                                inspect_point(latitude, longitude));
  }
  GRN_OBJ_FIN(context, &location);
  gcut_take_list(actual_keys, g_free);

  expected_keys = gcut_data_get_pointer(data, "expected");
  gcut_assert_equal_list_string(expected_keys, actual_keys);
}
Пример #5
0
static grn_bool
exec_equal(grn_ctx *ctx, grn_obj *x, grn_obj *y)
{
  switch (x->header.type) {
  case GRN_BULK :
    if (y->header.type == GRN_BULK) {
      grn_bool is_equal = GRN_FALSE;
      DO_EQ(x, y, is_equal);
      return is_equal;
    } else {
      return GRN_FALSE;
    }
    break;
  case GRN_VECTOR :
    if (y->header.type == GRN_VECTOR) {
      grn_bool is_equal = GRN_TRUE;
      unsigned int x_size = grn_vector_size(ctx, x);
      unsigned int y_size = grn_vector_size(ctx, y);
      unsigned int i;
      grn_obj x_element;
      grn_obj y_element;
      if (x_size != y_size) {
        return GRN_FALSE;
      }
      GRN_VOID_INIT(&x_element);
      GRN_VOID_INIT(&y_element);
      for (i = 0; i < x_size; i++) {
        const char *x_value;
        unsigned int x_size;
        unsigned int x_weight;
        grn_id x_domain;
        const char *y_value;
        unsigned int y_size;
        unsigned int y_weight;
        grn_id y_domain;

        x_size = grn_vector_get_element(ctx, x, i, &x_value, &x_weight, &x_domain);
        y_size = grn_vector_get_element(ctx, y, i, &y_value, &y_weight, &y_domain);
        if (x_weight != y_weight) {
          is_equal = GRN_FALSE;
          break;
        }
        grn_obj_reinit(ctx, &x_element, x_domain, 0);
        grn_bulk_write(ctx, &x_element, x_value, x_size);
        grn_obj_reinit(ctx, &y_element, y_domain, 0);
        grn_bulk_write(ctx, &y_element, y_value, y_size);
        DO_EQ((&x_element), (&y_element), is_equal);
        if (!is_equal) {
          break;
        }
      }
      GRN_OBJ_FIN(ctx, &x_element);
      GRN_OBJ_FIN(ctx, &y_element);
      return is_equal;
    } else {
      return GRN_FALSE;
    }
    break;
  case GRN_UVECTOR :
    if (y->header.type == GRN_UVECTOR) {
      grn_bool is_equal = GRN_TRUE;
      unsigned int x_size = grn_vector_size(ctx, x);
      unsigned int y_size = grn_vector_size(ctx, y);
      unsigned int i;
      grn_obj x_element;
      grn_obj y_element;
      unsigned int x_element_size = grn_uvector_element_size(ctx, x);
      unsigned int y_element_size = grn_uvector_element_size(ctx, y);
      if (x_size != y_size) {
        return GRN_FALSE;
      }
      GRN_OBJ_INIT(&x_element, GRN_BULK, 0, x->header.domain);
      GRN_OBJ_INIT(&y_element, GRN_BULK, 0, y->header.domain);
      for (i = 0; i < x_size; i++) {
        const char *x_value;
        const char *y_value;

        x_value = GRN_BULK_HEAD(x) + (x_element_size * i);
        y_value = GRN_BULK_HEAD(y) + (y_element_size * i);
        GRN_BULK_REWIND(&x_element);
        GRN_BULK_REWIND(&y_element);
        grn_bulk_write(ctx, &x_element, x_value, x_element_size);
        grn_bulk_write(ctx, &y_element, y_value, y_element_size);
        DO_EQ((&x_element), (&y_element), is_equal);
        if (!is_equal) {
          break;
        }
      }
      GRN_OBJ_FIN(ctx, &x_element);
      GRN_OBJ_FIN(ctx, &y_element);
      return is_equal;
    } else {
      return GRN_FALSE;
    }
    break;
  default :
    return GRN_FALSE;
  }
}
Пример #6
0
void
grn_output_obj(grn_ctx *ctx, grn_obj *outbuf, grn_content_type output_type,
               grn_obj *obj, grn_obj_format *format)
{
  grn_obj buf;
  GRN_TEXT_INIT(&buf, 0);
  switch (obj->header.type) {
  case GRN_BULK :
    switch (obj->header.domain) {
    case GRN_DB_VOID :
      grn_output_void(ctx, outbuf, output_type, GRN_BULK_HEAD(obj), GRN_BULK_VSIZE(obj));
      break;
    case GRN_DB_SHORT_TEXT :
    case GRN_DB_TEXT :
    case GRN_DB_LONG_TEXT :
      grn_output_str(ctx, outbuf, output_type, GRN_BULK_HEAD(obj), GRN_BULK_VSIZE(obj));
      break;
    case GRN_DB_BOOL :
      grn_output_bool(ctx, outbuf, output_type,
                       GRN_BULK_VSIZE(obj) ? GRN_UINT8_VALUE(obj) : 0);
      break;
    case GRN_DB_INT8 :
      grn_output_int32(ctx, outbuf, output_type,
                       GRN_BULK_VSIZE(obj) ? GRN_INT8_VALUE(obj) : 0);
      break;
    case GRN_DB_UINT8 :
      grn_output_int32(ctx, outbuf, output_type,
                       GRN_BULK_VSIZE(obj) ? GRN_UINT8_VALUE(obj) : 0);
      break;
    case GRN_DB_INT16 :
      grn_output_int32(ctx, outbuf, output_type,
                       GRN_BULK_VSIZE(obj) ? GRN_INT16_VALUE(obj) : 0);
      break;
    case GRN_DB_UINT16 :
      grn_output_int32(ctx, outbuf, output_type,
                       GRN_BULK_VSIZE(obj) ? GRN_UINT16_VALUE(obj) : 0);
      break;
    case GRN_DB_INT32 :
      grn_output_int32(ctx, outbuf, output_type,
                       GRN_BULK_VSIZE(obj) ? GRN_INT32_VALUE(obj) : 0);
      break;
    case GRN_DB_UINT32 :
      grn_output_int64(ctx, outbuf, output_type,
                       GRN_BULK_VSIZE(obj) ? GRN_UINT32_VALUE(obj) : 0);
      break;
    case GRN_DB_INT64 :
      grn_output_int64(ctx, outbuf, output_type,
                       GRN_BULK_VSIZE(obj) ? GRN_INT64_VALUE(obj) : 0);
      break;
    case GRN_DB_UINT64 :
      grn_output_uint64(ctx, outbuf, output_type,
                        GRN_BULK_VSIZE(obj) ? GRN_UINT64_VALUE(obj) : 0);
      break;
    case GRN_DB_FLOAT :
      grn_output_float(ctx, outbuf, output_type,
                       GRN_BULK_VSIZE(obj) ? GRN_FLOAT_VALUE(obj) : 0);
      break;
    case GRN_DB_TIME :
      grn_output_time(ctx, outbuf, output_type,
                      GRN_BULK_VSIZE(obj) ? GRN_INT64_VALUE(obj) : 0);
      break;
    case GRN_DB_TOKYO_GEO_POINT :
    case GRN_DB_WGS84_GEO_POINT :
      grn_output_geo_point(ctx, outbuf, output_type,
                           GRN_BULK_VSIZE(obj) ? (grn_geo_point *)GRN_BULK_HEAD(obj) : NULL);
      break;
    default :
      if (format) {
        int j;
        int ncolumns = GRN_BULK_VSIZE(&format->columns)/sizeof(grn_obj *);
        grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns);
        if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
          grn_output_array_open(ctx, outbuf, output_type, "COLUMNS", ncolumns);
          for (j = 0; j < ncolumns; j++) {
            grn_id range_id;
            grn_output_array_open(ctx, outbuf, output_type, "COLUMN", 2);
            GRN_BULK_REWIND(&buf);
            grn_column_name_(ctx, columns[j], &buf);
            grn_output_obj(ctx, outbuf, output_type, &buf, NULL);
            /* column range */
            range_id = grn_obj_get_range(ctx, columns[j]);
            if (range_id == GRN_ID_NIL) {
              GRN_TEXT_PUTS(ctx, outbuf, "null");
            } else {
              int name_len;
              grn_obj *range_obj;
              char name_buf[GRN_TABLE_MAX_KEY_SIZE];

              range_obj = grn_ctx_at(ctx, range_id);
              name_len = grn_obj_name(ctx, range_obj, name_buf,
                                      GRN_TABLE_MAX_KEY_SIZE);
              GRN_BULK_REWIND(&buf);
              GRN_TEXT_PUT(ctx, &buf, name_buf, name_len);
              grn_output_obj(ctx, outbuf, output_type, &buf, NULL);
            }
            grn_output_array_close(ctx, outbuf, output_type);
          }
          grn_output_array_close(ctx, outbuf, output_type);
        }
        grn_output_array_open(ctx, outbuf, output_type, "HIT", ncolumns);
        for (j = 0; j < ncolumns; j++) {
          grn_text_atoj_o(ctx, outbuf, output_type, columns[j], obj);
        }
        grn_output_array_close(ctx, outbuf, output_type);
      } else {
        grn_obj *table = grn_ctx_at(ctx, obj->header.domain);
        grn_id id = *((grn_id *)GRN_BULK_HEAD(obj));
        if (table && table->header.type != GRN_TABLE_NO_KEY) {
          grn_obj *accessor = grn_obj_column(ctx, table, "_key", 4);
          if (accessor) {
            grn_obj_get_value(ctx, accessor, id, &buf);
            grn_obj_unlink(ctx, accessor);
          }
          grn_output_obj(ctx, outbuf, output_type, &buf, format);
        } else {
          grn_output_int64(ctx, outbuf, output_type, id);
        }
      }
      break;
    }
    break;
  case GRN_UVECTOR :
    if (format) {
      int i, j;
      grn_id *v = (grn_id *)GRN_BULK_HEAD(obj), *ve = (grn_id *)GRN_BULK_CURR(obj);
      int ncolumns = GRN_BULK_VSIZE(&format->columns) / sizeof(grn_obj *);
      grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns);
      grn_output_array_open(ctx, outbuf, output_type, "RESULTSET", -1);
      grn_output_array_open(ctx, outbuf, output_type, "NHITS", 1);
      grn_text_itoa(ctx, outbuf, ve - v);
      grn_output_array_close(ctx, outbuf, output_type);
      if (v < ve) {
        if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
          grn_output_array_open(ctx, outbuf, output_type, "COLUMNS", -1);
          for (j = 0; j < ncolumns; j++) {
            grn_id range_id;
            grn_output_array_open(ctx, outbuf, output_type, "COLUMN", -1);
            GRN_BULK_REWIND(&buf);
            grn_column_name_(ctx, columns[j], &buf);
            grn_output_obj(ctx, outbuf, output_type, &buf, NULL);
            /* column range */
            range_id = grn_obj_get_range(ctx, columns[j]);
            if (range_id == GRN_ID_NIL) {
              GRN_TEXT_PUTS(ctx, outbuf, "null");
            } else {
              int name_len;
              grn_obj *range_obj;
              char name_buf[GRN_TABLE_MAX_KEY_SIZE];

              range_obj = grn_ctx_at(ctx, range_id);
              name_len = grn_obj_name(ctx, range_obj, name_buf,
                                      GRN_TABLE_MAX_KEY_SIZE);
              GRN_BULK_REWIND(&buf);
              GRN_TEXT_PUT(ctx, &buf, name_buf, name_len);
              grn_output_obj(ctx, outbuf, output_type, &buf, NULL);
            }
            grn_output_array_close(ctx, outbuf, output_type);
          }
          grn_output_array_close(ctx, outbuf, output_type);
        }
        for (i = 0;; i++) {
          grn_output_array_open(ctx, outbuf, output_type, "HITS", -1);
          for (j = 0; j < ncolumns; j++) {
            GRN_BULK_REWIND(&buf);
            grn_obj_get_value(ctx, columns[j], *v, &buf);
            grn_output_obj(ctx, outbuf, output_type, &buf, NULL);
          }
          grn_output_array_close(ctx, outbuf, output_type);
          v++;
          if (v < ve) {

          } else {
            break;
          }
        }
      }
      grn_output_array_close(ctx, outbuf, output_type);
    } else {
      grn_obj *range = grn_ctx_at(ctx, obj->header.domain);
      if (range && range->header.type == GRN_TYPE) {
        int value_size = ((struct _grn_type *)range)->obj.range;
        char *v = (char *)GRN_BULK_HEAD(obj),
             *ve = (char *)GRN_BULK_CURR(obj);
        grn_output_array_open(ctx, outbuf, output_type, "VECTOR", -1);
        if (v < ve) {
          for (;;) {
            grn_obj value;
            GRN_OBJ_INIT(&value, GRN_BULK, 0, obj->header.domain);
            grn_bulk_write_from(ctx, &value, v, 0, value_size);
            grn_output_obj(ctx, outbuf, output_type, &value, NULL);

            v += value_size;
            if (v < ve) {

            } else {
              break;
            }
          }
        }
        grn_output_array_close(ctx, outbuf, output_type);
      } else {
        grn_id *v = (grn_id *)GRN_BULK_HEAD(obj),
               *ve = (grn_id *)GRN_BULK_CURR(obj);
        grn_output_array_open(ctx, outbuf, output_type, "VECTOR", ve - v);
        if (v < ve) {
          grn_obj key;
          GRN_OBJ_INIT(&key, GRN_BULK, 0, range->header.domain);
          for (;;) {
            if (range->header.type != GRN_TABLE_NO_KEY) {
              grn_table_get_key2(ctx, range, *v, &key);
              grn_output_obj(ctx, outbuf, output_type, &key, NULL);
              GRN_BULK_REWIND(&key);
            } else {
              grn_obj id;
              GRN_UINT32_INIT(&id, 0);
              GRN_UINT32_SET(ctx, &id, *v);
              grn_output_obj(ctx, outbuf, output_type, &id, NULL);
              GRN_OBJ_FIN(ctx, &id);
            }
            v++;
            if (v < ve) {

            } else {
              break;
            }
          }
          GRN_OBJ_FIN(ctx, &key);
        }
        grn_output_array_close(ctx, outbuf, output_type);
      }
    }
    break;
  case GRN_VECTOR :
    if (obj->header.domain == GRN_DB_VOID) {
      ERR(GRN_INVALID_ARGUMENT, "invalid obj->header.domain");
    }
    if (format) {
      ERR(GRN_FUNCTION_NOT_IMPLEMENTED,
          "cannot print GRN_VECTOR using grn_obj_format");
    } else {
      unsigned int i, n;
      grn_obj value;
      GRN_VOID_INIT(&value);
      n = grn_vector_size(ctx, obj);
      grn_output_array_open(ctx, outbuf, output_type, "VECTOR", -1);
      for (i = 0; i < n; i++) {
        const char *_value;
        unsigned int weight, length;
        grn_id domain;

        length = grn_vector_get_element(ctx, obj, i,
                                        &_value, &weight, &domain);
        if (domain != GRN_DB_VOID) {
          grn_obj_reinit(ctx, &value, domain, 0);
        } else {
          grn_obj_reinit(ctx, &value, obj->header.domain, 0);
        }
        grn_bulk_write(ctx, &value, _value, length);
        grn_output_obj(ctx, outbuf, output_type, &value, NULL);
      }
      grn_output_array_close(ctx, outbuf, output_type);
      GRN_OBJ_FIN(ctx, &value);
    }
    break;
  case GRN_PVECTOR :
    if (format) {
      ERR(GRN_FUNCTION_NOT_IMPLEMENTED,
          "cannot print GRN_PVECTOR using grn_obj_format");
    } else {
      unsigned int i, n;
      grn_output_array_open(ctx, outbuf, output_type, "VECTOR", -1);
      n = GRN_BULK_VSIZE(obj) / sizeof(grn_obj *);
      for (i = 0; i < n; i++) {
        grn_obj *value;

        value = GRN_PTR_VALUE_AT(obj, i);
        grn_output_obj(ctx, outbuf, output_type, value, NULL);
      }
      grn_output_array_close(ctx, outbuf, output_type);
    }
    break;
  case GRN_TABLE_HASH_KEY :
  case GRN_TABLE_PAT_KEY :
  case GRN_TABLE_NO_KEY :
  case GRN_TABLE_VIEW :
    if (format) {
      int i, j;
      int ncolumns = GRN_BULK_VSIZE(&format->columns)/sizeof(grn_obj *);
      grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns);
      grn_table_cursor *tc = grn_table_cursor_open(ctx, obj, NULL, 0, NULL, 0,
                                                   format->offset, format->limit,
                                                   GRN_CURSOR_ASCENDING);
      int resultset_size = -1;
      if (!tc) { ERRCLR(ctx); }
#ifdef HAVE_MESSAGE_PACK
      resultset_size = 1; /* [NHITS, (COLUMNS), (HITS)] */
      if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
        resultset_size++;
      }
      resultset_size += format->limit;
#endif
      grn_output_array_open(ctx, outbuf, output_type, "RESULTSET", resultset_size);
      grn_output_array_open(ctx, outbuf, output_type, "NHITS", 1);
      if (output_type == GRN_CONTENT_XML) {
        grn_text_itoa(ctx, outbuf, format->nhits);
      } else {
        grn_output_int32(ctx, outbuf, output_type, format->nhits);
      }
      grn_output_array_close(ctx, outbuf, output_type);
      if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
        grn_output_array_open(ctx, outbuf, output_type, "COLUMNS", ncolumns);
        for (j = 0; j < ncolumns; j++) {
          grn_id range_id;
          grn_output_array_open(ctx, outbuf, output_type, "COLUMN", 2);
          GRN_BULK_REWIND(&buf);
          grn_column_name_(ctx, columns[j], &buf);
          grn_output_obj(ctx, outbuf, output_type, &buf, NULL);
          /* column range */
          range_id = grn_obj_get_range(ctx, columns[j]);
          if (range_id == GRN_ID_NIL) {
            GRN_TEXT_PUTS(ctx, outbuf, "null");
          } else {
            int name_len;
            grn_obj *range_obj;
            char name_buf[GRN_TABLE_MAX_KEY_SIZE];

            range_obj = grn_ctx_at(ctx, range_id);
            name_len = grn_obj_name(ctx, range_obj, name_buf,
                                    GRN_TABLE_MAX_KEY_SIZE);
            GRN_BULK_REWIND(&buf);
            GRN_TEXT_PUT(ctx, &buf, name_buf, name_len);
            grn_output_obj(ctx, outbuf, output_type, &buf, NULL);
          }
          grn_output_array_close(ctx, outbuf, output_type);
        }
        grn_output_array_close(ctx, outbuf, output_type);
      }
      if (tc) {
        grn_obj id;
        GRN_TEXT_INIT(&id, 0);
        for (i = 0; !grn_table_cursor_next_o(ctx, tc, &id); i++) {
          grn_output_array_open(ctx, outbuf, output_type, "HIT", ncolumns);
          for (j = 0; j < ncolumns; j++) {
            grn_text_atoj_o(ctx, outbuf, output_type, columns[j], &id);
          }
          grn_output_array_close(ctx, outbuf, output_type);
        }
        GRN_OBJ_FIN(ctx, &id);
        grn_table_cursor_close(ctx, tc);
      }
      grn_output_array_close(ctx, outbuf, output_type);
    } else {
      int i;
      grn_obj *column = grn_obj_column(ctx, obj, "_key", 4);
      grn_table_cursor *tc = grn_table_cursor_open(ctx, obj, NULL, 0, NULL, 0,
                                                   0, -1, GRN_CURSOR_ASCENDING);
      grn_output_array_open(ctx, outbuf, output_type, "HIT", -1);
      if (tc) {
        grn_obj id;
        GRN_TEXT_INIT(&id, 0);
        for (i = 0; !grn_table_cursor_next_o(ctx, tc, &id); i++) {
          /* todo:
          grn_text_atoj_o(ctx, outbuf, output_type, column, &id);
          */
          GRN_BULK_REWIND(&buf);
          grn_obj_get_value_o(ctx, column, &id, &buf);
          grn_text_esc(ctx, outbuf, GRN_BULK_HEAD(&buf), GRN_BULK_VSIZE(&buf));
        }
        GRN_OBJ_FIN(ctx, &id);
        grn_table_cursor_close(ctx, tc);
      }
      grn_output_array_close(ctx, outbuf, output_type);
      grn_obj_unlink(ctx, column);
    }
    break;
  }
  GRN_OBJ_FIN(ctx, &buf);
}
Пример #7
0
void
test_select_search(void)
{
  grn_obj *v;

  prepare_data();

  cut_assert_not_null((cond = grn_expr_create(&context, NULL, 0)));
  v = grn_expr_add_var(&context, cond, NULL, 0);
  GRN_RECORD_INIT(v, 0, grn_obj_id(&context, docs));
  grn_expr_append_obj(&context, cond, v, GRN_OP_PUSH, 1);
  GRN_TEXT_SETS(&context, &text_buf, "size");
  grn_expr_append_const(&context, cond, &text_buf, GRN_OP_PUSH, 1);
  grn_expr_append_op(&context, cond, GRN_OP_GET_VALUE, 2);
  GRN_UINT32_SET(&context, &int_buf, 14);
  grn_expr_append_const(&context, cond, &int_buf, GRN_OP_PUSH, 1);
  grn_expr_append_op(&context, cond, GRN_OP_EQUAL, 2);
  grn_expr_compile(&context, cond);

  cut_assert_not_null((expr = grn_expr_create(&context, NULL, 0)));

  v = grn_expr_add_var(&context, expr, NULL, 0);

  grn_expr_append_obj(&context, expr, v, GRN_OP_PUSH, 1);

  GRN_BULK_REWIND(&text_buf);
  grn_expr_append_const(&context, expr, &text_buf, GRN_OP_PUSH, 1);
  GRN_UINT32_SET(&context, &int_buf, GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC);
  grn_expr_append_const(&context, expr, &int_buf, GRN_OP_PUSH, 1);
  grn_expr_append_obj(&context, expr, docs, GRN_OP_PUSH, 1);
  GRN_PTR_SET(&context, &ptr_buf, NULL);
  grn_expr_append_obj(&context, expr, &ptr_buf, GRN_OP_PUSH, 1);
  grn_expr_append_op(&context, expr, GRN_OP_TABLE_CREATE, 4);

  grn_expr_append_op(&context, expr, GRN_OP_ASSIGN, 2);

  grn_expr_append_obj(&context, expr, docs, GRN_OP_PUSH, 1);
  grn_expr_append_obj(&context, expr, cond, GRN_OP_PUSH, 1);
  grn_expr_append_obj(&context, expr, v, GRN_OP_PUSH, 1);
  GRN_UINT32_SET(&context, &int_buf, GRN_OP_OR);
  grn_expr_append_const(&context, expr, &int_buf, GRN_OP_PUSH, 1);
  grn_expr_append_op(&context, expr, GRN_OP_TABLE_SELECT, 4);

  grn_expr_append_obj(&context, expr, index_body, GRN_OP_PUSH, 1);
  GRN_TEXT_SETS(&context, &text_buf, "moge");
  grn_expr_append_const(&context, expr, &text_buf, GRN_OP_PUSH, 1);
  grn_expr_append_obj(&context, expr, v, GRN_OP_PUSH, 1);
  GRN_UINT32_SET(&context, &int_buf, GRN_OP_AND);
  grn_expr_append_const(&context, expr, &int_buf, GRN_OP_PUSH, 1);
  grn_expr_append_op(&context, expr, GRN_OP_OBJ_SEARCH, 4);

  grn_expr_append_obj(&context, expr, v, GRN_OP_PUSH, 1);
  GRN_TEXT_SETS(&context, &text_buf, ".size ._score .body");
  grn_expr_append_const(&context, expr, &text_buf, GRN_OP_PUSH, 1);
  GRN_BULK_REWIND(&text_buf);
  grn_expr_append_obj(&context, expr, &text_buf, GRN_OP_PUSH, 1);
  grn_expr_append_op(&context, expr, GRN_OP_JSON_PUT, 3);

  grn_expr_exec(&context, expr, 0);

  cut_assert_equal_substring("[[2],[14,4,\"moge moge moge\"],[14,2,\"moge hoge hoge\"]]",
                             GRN_TEXT_VALUE(&text_buf), GRN_TEXT_LEN(&text_buf));
}
Пример #8
0
static grn_rc
sequential_fuzzy_search(grn_ctx *ctx, grn_obj *table, grn_obj *column, grn_obj *query,
                        uint32_t max_distance, uint32_t prefix_match_size,
                        uint32_t max_expansion, int flags, grn_obj *res, grn_operator op)
{
  grn_table_cursor *tc;
  char *sx = GRN_TEXT_VALUE(query);
  char *ex = GRN_BULK_CURR(query);

  if (op == GRN_OP_AND) {
    tc = grn_table_cursor_open(ctx, res, NULL, 0, NULL, 0, 0, -1, GRN_CURSOR_BY_ID);
  } else {
    tc = grn_table_cursor_open(ctx, table, NULL, 0, NULL, 0, 0, -1, GRN_CURSOR_BY_ID);
  }
  if (tc) {
    grn_id id;
    grn_obj value;
    score_heap *heap;
    int i, n;
    GRN_TEXT_INIT(&value, 0);

    heap = score_heap_open(ctx, SCORE_HEAP_SIZE);
    if (!heap) {
      grn_table_cursor_close(ctx, tc);
      grn_obj_unlink(ctx, &value);
      return GRN_NO_MEMORY_AVAILABLE;
    }

    while ((id = grn_table_cursor_next(ctx, tc))) {
      unsigned int distance = 0;
      grn_obj *domain;
      GRN_BULK_REWIND(&value);
      grn_obj_get_value(ctx, column, id, &value);
      domain = grn_ctx_at(ctx, ((&value))->header.domain);
      if ((&(value))->header.type == GRN_VECTOR) {
        n = grn_vector_size(ctx, &value);
        for (i = 0; i < n; i++) {
          unsigned int length;
          const char *vector_value = NULL;
          length = grn_vector_get_element(ctx, &value, i, &vector_value, NULL, NULL);

          if (!prefix_match_size ||
              (prefix_match_size > 0 && length >= prefix_match_size &&
               !memcmp(sx, vector_value, prefix_match_size))) {
            distance = calc_edit_distance(ctx, sx, ex,
                                          (char *)vector_value,
                                          (char *)vector_value + length, flags);
            if (distance <= max_distance) {
              score_heap_push(ctx, heap, id, distance);
              break;
            }
          }
        }
      } else if ((&(value))->header.type == GRN_UVECTOR &&
                  grn_obj_is_table(ctx, domain)) {
        n = grn_vector_size(ctx, &value);
        for (i = 0; i < n; i++) {
          grn_id rid;
          char key_name[GRN_TABLE_MAX_KEY_SIZE];
          int key_length;
          rid = grn_uvector_get_element(ctx, &value, i, NULL);
          key_length = grn_table_get_key(ctx, domain, rid, key_name, GRN_TABLE_MAX_KEY_SIZE);

          if (!prefix_match_size ||
              (prefix_match_size > 0 && key_length >= prefix_match_size &&
               !memcmp(sx, key_name, prefix_match_size))) {
            distance = calc_edit_distance(ctx, sx, ex,
                                          key_name, key_name + key_length, flags);
            if (distance <= max_distance) {
              score_heap_push(ctx, heap, id, distance);
              break;
            }
          }
        }
      } else {
        if (grn_obj_is_reference_column(ctx, column)) {
          grn_id rid;
          char key_name[GRN_TABLE_MAX_KEY_SIZE];
          int key_length;
          rid = GRN_RECORD_VALUE(&value);
          key_length = grn_table_get_key(ctx, domain, rid, key_name, GRN_TABLE_MAX_KEY_SIZE);
          if (!prefix_match_size ||
              (prefix_match_size > 0 && key_length >= prefix_match_size &&
               !memcmp(sx, key_name, prefix_match_size))) {
            distance = calc_edit_distance(ctx, sx, ex,
                                          key_name, key_name + key_length, flags);
            if (distance <= max_distance) {
              score_heap_push(ctx, heap, id, distance);
            }
          }
        } else {
          if (!prefix_match_size ||
              (prefix_match_size > 0 && GRN_TEXT_LEN(&value) >= prefix_match_size &&
               !memcmp(sx, GRN_TEXT_VALUE(&value), prefix_match_size))) {
            distance = calc_edit_distance(ctx, sx, ex,
                                          GRN_TEXT_VALUE(&value),
                                          GRN_BULK_CURR(&value), flags);
            if (distance <= max_distance) {
              score_heap_push(ctx, heap, id, distance);
            }
          }
        }
      }
      grn_obj_unlink(ctx, domain);
    }
    grn_table_cursor_close(ctx, tc);
    grn_obj_unlink(ctx, &value);

    for (i = 0; i < heap->n_entries; i++) {
      if (max_expansion > 0 && i >= max_expansion) {
        break;
      }
      {
        grn_posting posting;
        posting.rid = heap->nodes[i].id;
        posting.sid = 1;
        posting.pos = 0;
        posting.weight = max_distance - heap->nodes[i].score;
        grn_ii_posting_add(ctx, &posting, (grn_hash *)res, op);
      }
    }
    grn_ii_resolve_sel_and(ctx, (grn_hash *)res, op);
    score_heap_close(ctx, heap);
  }

  return GRN_SUCCESS;
}
Пример #9
0
static grn_obj *
command_tokenize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  grn_raw_string tokenizer_raw;
  grn_raw_string string_raw;
  grn_raw_string normalizer_raw;
  grn_raw_string flags_raw;
  grn_raw_string mode_raw;
  grn_raw_string token_filters_raw;

#define GET_VALUE(name)                                         \
  name ## _raw.value =                                          \
    grn_plugin_proc_get_var_string(ctx,                         \
                                   user_data,                   \
                                   #name,                       \
                                   strlen(#name),               \
                                   &(name ## _raw.length))

  GET_VALUE(tokenizer);
  GET_VALUE(string);
  GET_VALUE(normalizer);
  GET_VALUE(flags);
  GET_VALUE(mode);
  GET_VALUE(token_filters);

#undef GET_VALUE

  if (tokenizer_raw.length == 0) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "[tokenize] tokenizer name is missing");
    return NULL;
  }

  if (string_raw.length == 0) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "[tokenize] string is missing");
    return NULL;
  }

  {
    unsigned int flags;
    grn_obj *lexicon;

    flags = parse_tokenize_flags(ctx, &flags_raw);
    if (ctx->rc != GRN_SUCCESS) {
      return NULL;
    }

    lexicon = create_lexicon_for_tokenize(ctx,
                                          &tokenizer_raw,
                                          &normalizer_raw,
                                          &token_filters_raw);
    if (!lexicon) {
      return NULL;
    }

    {
      grn_obj tokens;
      GRN_VALUE_FIX_SIZE_INIT(&tokens, GRN_OBJ_VECTOR, GRN_ID_NIL);
      if (mode_raw.length == 0 ||
          GRN_RAW_STRING_EQUAL_CSTRING(mode_raw, "ADD")) {
        tokenize(ctx, lexicon, &string_raw, GRN_TOKEN_ADD, flags, &tokens);
        output_tokens(ctx, &tokens, lexicon, NULL);
      } else if (GRN_RAW_STRING_EQUAL_CSTRING(mode_raw, "GET")) {
        tokenize(ctx, lexicon, &string_raw, GRN_TOKEN_ADD, flags, &tokens);
        GRN_BULK_REWIND(&tokens);
        tokenize(ctx, lexicon, &string_raw, GRN_TOKEN_GET, flags, &tokens);
        output_tokens(ctx, &tokens, lexicon, NULL);
      } else {
        GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                         "[tokenize] invalid mode: <%.*s>",
                         (int)mode_raw.length,
                         mode_raw.value);
      }
      GRN_OBJ_FIN(ctx, &tokens);
    }
#undef MODE_NAME_EQUAL

    grn_obj_unlink(ctx, lexicon);
  }

  return NULL;
}
Пример #10
0
void
test_setoperation(gconstpointer data)
{
  grn_operator operator;
  grn_obj *entries;
  grn_obj *result1;
  grn_obj *result2;
  const char *dump;

  operator = gcut_data_get_int(data, "operator");

  assert_send_command("table_create Entries TABLE_HASH_KEY ShortText");
  send_command(
    "load "
    "--table Entries "
    "--values '[{\"_key\": \"a\"}, {\"_key\": \"b\"}, {\"_key\": \"c\"}]'");

  entries = grn_ctx_get(context, "Entries", -1);
  {
    const char *condition = "_id < 3";
    grn_obj *expr;
    grn_obj *variable;

    GRN_EXPR_CREATE_FOR_QUERY(context, entries, expr, variable);
    grn_expr_parse(context, expr,
                   condition, strlen(condition),
                   NULL, GRN_OP_AND, GRN_OP_MATCH, GRN_EXPR_SYNTAX_SCRIPT);
    result1 = grn_table_select(context, entries, expr, NULL, GRN_OP_OR);
    grn_obj_unlink(context, expr);
  }
  {
    const char *condition = "_id > 1";
    grn_obj *expr;
    grn_obj *variable;

    GRN_EXPR_CREATE_FOR_QUERY(context, entries, expr, variable);
    grn_expr_parse(context, expr,
                   condition, strlen(condition),
                   NULL, GRN_OP_AND, GRN_OP_MATCH, GRN_EXPR_SYNTAX_SCRIPT);
    result2 = grn_table_select(context, entries, expr, NULL, GRN_OP_OR);
    grn_obj_unlink(context, expr);
  }

  grn_table_setoperation(context, result1, result2, result1, operator);

  {
    grn_bool first_record = GRN_TRUE;
    grn_obj buffer;
    grn_obj *score_accessor;
    grn_obj score;

    GRN_TEXT_INIT(&buffer, 0);
    GRN_TEXT_PUTS(context, &buffer, "[");
    score_accessor = grn_obj_column(context, result1,
                                    GRN_COLUMN_NAME_SCORE,
                                    GRN_COLUMN_NAME_SCORE_LEN);
    GRN_FLOAT_INIT(&score, 0);
    GRN_TABLE_EACH_BEGIN(context, result1, cursor, id) {
      void *result_key;
      grn_id entry_id;
      char entry_key[GRN_TABLE_MAX_KEY_SIZE];
      int entry_key_size;

      if (first_record) {
        first_record = GRN_FALSE;
      } else {
        GRN_TEXT_PUTS(context, &buffer, ", ");
      }

      GRN_TEXT_PUTS(context, &buffer, "[");

      grn_table_cursor_get_key(context, cursor, &result_key);
      entry_id = *((grn_id *)result_key);
      entry_key_size = grn_table_get_key(context,
                                         entries,
                                         entry_id,
                                         entry_key,
                                         GRN_TABLE_MAX_KEY_SIZE);
      GRN_TEXT_PUT(context, &buffer, entry_key, entry_key_size);

      GRN_TEXT_PUTS(context, &buffer, ", ");

      GRN_BULK_REWIND(&score);
      grn_obj_get_value(context, score_accessor, id, &score);
      grn_text_printf(context, &buffer, "%.1f", GRN_FLOAT_VALUE(&score));

      GRN_TEXT_PUTS(context, &buffer, "]");
    } GRN_TABLE_EACH_END(context, cursor);
    GRN_OBJ_FIN(context, &score);
    grn_obj_unlink(context, score_accessor);
    GRN_TEXT_PUTS(context, &buffer, "]");

    dump = cut_take_strndup(GRN_TEXT_VALUE(&buffer), GRN_TEXT_LEN(&buffer));
    GRN_OBJ_FIN(context, &buffer);
  }
Пример #11
0
void
test_truncate_named(gconstpointer data)
{
  grn_obj_flags flags;
  const gchar *table_name = "SearchEngines";
  const gchar *key;
  grn_obj *key_type;
  unsigned key_size;
  const gchar *column_name = "description";
  grn_obj *column_type;
  const gchar *column_value = "An open-source fulltext search engine";
  grn_bool array_p;
  grn_id record_id;
  int added;

  flags = gcut_data_get_int(data, "flags");
  array_p = ((flags & GRN_OBJ_TABLE_TYPE_MASK) == GRN_OBJ_TABLE_NO_KEY);

  if (array_p) {
    key = NULL;
    key_size = 0;
    key_type = NULL;
  } else {
    key = "groonga";
    key_size = strlen(key);
    key_type = grn_ctx_at(context, GRN_DB_SHORT_TEXT);
  }
  table = grn_table_create(context,
			   table_name, strlen(table_name), NULL,
                           flags | GRN_OBJ_PERSISTENT,
                           key_type, NULL);
  if (key_type) {
    grn_obj_unlink(context, key_type);
  }
  grn_test_assert_context(context);

  column_type = grn_ctx_at(context, GRN_DB_SHORT_TEXT);
  column = grn_column_create(context, table, column_name, strlen(column_name),
			     NULL,
			     GRN_OBJ_COLUMN_SCALAR | GRN_OBJ_PERSISTENT,
			     column_type);
  grn_obj_unlink(context, column_type);
  grn_test_assert_context(context);

  record_id = grn_table_add(context, table, key, key_size, &added);
  grn_test_assert_not_nil(record_id);
  cut_assert_true(added);

  grn_obj_reinit(context, &buffer, GRN_DB_SHORT_TEXT, 0);
  GRN_TEXT_PUTS(context, &buffer, column_value);
  grn_test_assert(grn_obj_set_value(context, column, record_id,
				    &buffer, GRN_OBJ_SET));

  GRN_BULK_REWIND(&buffer);
  grn_obj_get_value(context, column, record_id, &buffer);
  GRN_TEXT_PUTC(context, &buffer, '\0');
  cut_assert_equal_string(column_value, GRN_TEXT_VALUE(&buffer));
  cut_assert_equal_uint(1, grn_table_size(context, table));

  grn_test_assert(grn_table_truncate(context, table));

  GRN_BULK_REWIND(&buffer);
  grn_obj_get_value(context, column, record_id, &buffer);
  GRN_TEXT_PUTC(context, &buffer, '\0');
  cut_assert_equal_string("", GRN_TEXT_VALUE(&buffer));
  cut_assert_equal_uint(0, grn_table_size(context, table));
}
Пример #12
0
static grn_obj *
regexp_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  int char_len;
  grn_token_status status = 0;
  grn_regexp_tokenizer *tokenizer = user_data->ptr;
  unsigned int n_characters = 0;
  int ngram_unit = 2;
  grn_obj *buffer = &(tokenizer->buffer);
  const char *current = tokenizer->next;
  const char *end = tokenizer->end;
  const const uint_least8_t *char_types = tokenizer->char_types;
  grn_tokenize_mode mode = tokenizer->query->tokenize_mode;
  grn_bool is_begin = tokenizer->is_begin;
  grn_bool is_start_token = tokenizer->is_start_token;
  grn_bool break_by_blank = GRN_FALSE;
  grn_bool break_by_end_mark = GRN_FALSE;

  GRN_BULK_REWIND(buffer);
  tokenizer->is_begin = GRN_FALSE;
  tokenizer->is_start_token = GRN_FALSE;

  if (char_types) {
    char_types += tokenizer->nth_char;
  }

  if (mode != GRN_TOKEN_GET) {
    if (is_begin) {
      grn_tokenizer_token_push(ctx,
                               &(tokenizer->token),
                               GRN_TOKENIZER_BEGIN_MARK_UTF8,
                               GRN_TOKENIZER_BEGIN_MARK_UTF8_LEN,
                               status);
      return NULL;
    }

    if (tokenizer->is_end) {
      status |= GRN_TOKEN_LAST | GRN_TOKEN_REACH_END;
      grn_tokenizer_token_push(ctx,
                               &(tokenizer->token),
                               GRN_TOKENIZER_END_MARK_UTF8,
                               GRN_TOKENIZER_END_MARK_UTF8_LEN,
                               status);
      return NULL;
    }
    if (is_start_token) {
      if (char_types && GRN_STR_ISBLANK(char_types[-1])) {
        status |= GRN_TOKEN_SKIP;
        grn_tokenizer_token_push(ctx, &(tokenizer->token), "", 0, status);
        return NULL;
      }
    }
  }

  char_len = grn_charlen_(ctx, current, end, tokenizer->query->encoding);
  if (char_len == 0) {
    status |= GRN_TOKEN_LAST | GRN_TOKEN_REACH_END;
    grn_tokenizer_token_push(ctx, &(tokenizer->token), "", 0, status);
    return NULL;
  }

  if (mode == GRN_TOKEN_GET) {
    if (is_begin &&
        char_len == GRN_TOKENIZER_BEGIN_MARK_UTF8_LEN &&
        memcmp(current, GRN_TOKENIZER_BEGIN_MARK_UTF8, char_len) == 0) {
      n_characters++;
      GRN_TEXT_PUT(ctx, buffer, current, char_len);
      current += char_len;
      tokenizer->next = current;
      tokenizer->nth_char++;
      if (current == end) {
        status |= GRN_TOKEN_LAST | GRN_TOKEN_REACH_END;
      }
      grn_tokenizer_token_push(ctx,
                               &(tokenizer->token),
                               GRN_TOKENIZER_BEGIN_MARK_UTF8,
                               GRN_TOKENIZER_BEGIN_MARK_UTF8_LEN,
                               status);
      return NULL;
    }

    if (current + char_len == end &&
        char_len == GRN_TOKENIZER_END_MARK_UTF8_LEN &&
        memcmp(current, GRN_TOKENIZER_END_MARK_UTF8, char_len) == 0) {
      status |= GRN_TOKEN_LAST | GRN_TOKEN_REACH_END;
      grn_tokenizer_token_push(ctx,
                               &(tokenizer->token),
                               GRN_TOKENIZER_END_MARK_UTF8,
                               GRN_TOKENIZER_END_MARK_UTF8_LEN,
                               status);
      return NULL;
    }
  }

  while (GRN_TRUE) {
    n_characters++;
    GRN_TEXT_PUT(ctx, buffer, current, char_len);
    current += char_len;
    if (n_characters == 1) {
      tokenizer->next = current;
      tokenizer->nth_char++;
    }

    if (char_types) {
      uint_least8_t char_type;
      char_type = char_types[0];
      char_types++;
      if (GRN_STR_ISBLANK(char_type)) {
        break_by_blank = GRN_TRUE;
      }
    }

    char_len = grn_charlen_(ctx, (const char *)current, (const char *)end,
                            tokenizer->query->encoding);
    if (char_len == 0) {
      break;
    }

    if (mode == GRN_TOKEN_GET &&
        current + char_len == end &&
        char_len == GRN_TOKENIZER_END_MARK_UTF8_LEN &&
        memcmp(current, GRN_TOKENIZER_END_MARK_UTF8, char_len) == 0) {
      break_by_end_mark = GRN_TRUE;
    }

    if (break_by_blank || break_by_end_mark) {
      break;
    }

    if (n_characters == ngram_unit) {
      break;
    }
  }

  if (tokenizer->is_overlapping) {
    status |= GRN_TOKEN_OVERLAP;
  }
  if (n_characters < ngram_unit) {
    status |= GRN_TOKEN_UNMATURED;
  }
  tokenizer->is_overlapping = (n_characters > 1);

  if (mode == GRN_TOKEN_GET) {
    if (current == end) {
      tokenizer->is_end = GRN_TRUE;
      status |= GRN_TOKEN_LAST | GRN_TOKEN_REACH_END;
      if (status & GRN_TOKEN_UNMATURED) {
        status |= GRN_TOKEN_FORCE_PREFIX;
      }
    } else {
      if (break_by_blank) {
        tokenizer->get.n_skip_tokens = 0;
        tokenizer->is_start_token = GRN_TRUE;
      } else if (break_by_end_mark) {
        if (!is_start_token && (status & GRN_TOKEN_UNMATURED)) {
          status |= GRN_TOKEN_SKIP;
        }
      } else if (tokenizer->get.n_skip_tokens > 0) {
        tokenizer->get.n_skip_tokens--;
        status |= GRN_TOKEN_SKIP;
      } else {
        tokenizer->get.n_skip_tokens = ngram_unit - 1;
      }
    }
  } else {
    if (tokenizer->next == end) {
      tokenizer->is_end = GRN_TRUE;
    }
    if (break_by_blank) {
      tokenizer->is_start_token = GRN_TRUE;
    }
  }

  grn_tokenizer_token_push(ctx,
                           &(tokenizer->token),
                           GRN_TEXT_VALUE(buffer),
                           GRN_TEXT_LEN(buffer),
                           status);

  return NULL;
}
Пример #13
0
void
test_mroonga_index_score(void)
{
    grn_obj *t1,*c1,*lc,*ft;
    grn_obj buff;
    grn_id r1,r2,r3,r4;

    remove_tmp_directory();
    g_mkdir_with_parents(tmp_directory,0700);
    g_chdir(tmp_directory);
    g_mkdir_with_parents("mrn",0700);

    db = grn_db_create(context,"mroonga.grn",NULL);
    cut_assert_not_null(db);

    /* actual table */
    t1 = grn_table_create(context,"t1",2,"mrn/t1.grn",
                          GRN_OBJ_TABLE_NO_KEY|GRN_OBJ_PERSISTENT,NULL,0);
    cut_assert_not_null(t1);

    /* lexicon table */
    lc = grn_table_create(context,"lc",2,"mrn/lc.grn",
                          GRN_OBJ_TABLE_PAT_KEY|GRN_OBJ_PERSISTENT,
                          grn_ctx_at(context, GRN_DB_SHORT_TEXT), 0);
    cut_assert_not_null(lc);
    grn_test_assert(grn_obj_set_info(context, lc, GRN_INFO_DEFAULT_TOKENIZER,
                                     grn_ctx_at(context, GRN_DB_BIGRAM)));

    /* actual column */
    c1 = grn_column_create(context,t1,"c1",2,"mrn/t1.c1.grn",
                           GRN_OBJ_COLUMN_SCALAR|GRN_OBJ_PERSISTENT,
                           grn_ctx_at(context, GRN_DB_TEXT));
    cut_assert_not_null(c1);

    /* fulltext index */
    ft = grn_column_create(context,lc,"ft",2,"mrn/lc.ft.grn",
                           GRN_OBJ_COLUMN_INDEX|GRN_OBJ_PERSISTENT,t1);
    cut_assert_not_null(ft);

    GRN_TEXT_INIT(&buff,0);

    /* link between actual column and fulltext index */
    GRN_UINT32_SET(context, &buff, grn_obj_id(context, c1));
    grn_obj_set_info(context, ft, GRN_INFO_SOURCE, &buff); /* need to use grn_id */

    /* insert row */
    r1 = grn_table_add(context, t1, NULL, 0, NULL);
    cut_assert_equal_int(1,r1);
    GRN_TEXT_SETS(context, &buff, "abcde");
    grn_test_assert(grn_obj_set_value(context, c1, r1, &buff, GRN_OBJ_SET));

    r2 = grn_table_add(context, t1, NULL, 0, NULL);
    cut_assert_equal_int(2,r2);
    GRN_TEXT_SETS(context, &buff, "fghij");
    grn_test_assert(grn_obj_set_value(context, c1, r2, &buff, GRN_OBJ_SET));

    r3 = grn_table_add(context, t1, NULL, 0, NULL);
    cut_assert_equal_int(3,r3);
    GRN_TEXT_SETS(context, &buff, "11 22 33");
    grn_test_assert(grn_obj_set_value(context, c1, r3, &buff, GRN_OBJ_SET));

    r4 = grn_table_add(context, t1, NULL, 0, NULL);
    cut_assert_equal_int(4,r4);
    GRN_TEXT_SETS(context, &buff, "44 22 55");
    grn_test_assert(grn_obj_set_value(context, c1, r4, &buff, GRN_OBJ_SET));

    /* confirm record are inserted in both column and index */
    cut_assert_equal_int(4,grn_table_size(context,t1));
    cut_assert_equal_int(23,grn_table_size(context,lc));

    /* nlq search */
    {
        grn_id id, docid;
        grn_obj *res;
        grn_table_cursor *tc;
        grn_obj score, *score_column;
        res = grn_table_create(context, NULL, 0, NULL,
                               GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, t1, 0);
        GRN_UINT32_INIT(&score, 0);
        GRN_BULK_REWIND(&buff);
        GRN_TEXT_SETS(context, &buff, "hij");
        grn_obj_search(context, ft, &buff, res, GRN_OP_OR, NULL);
        cut_assert_equal_int(1, grn_table_size(context, res));
        score_column = grn_obj_column(context, res, ".:score", 7);
        tc = grn_table_cursor_open(context, res, NULL, 0, NULL, 0, 0, 0, 0);
        while ((id = grn_table_cursor_next(context, tc))) {
            GRN_BULK_REWIND(&buff);
            grn_table_get_key(context, res, id, &docid, sizeof(grn_id));
            cut_assert_equal_int(2, docid);
            cut_assert_not_null(grn_obj_get_value(context, c1, docid, &buff));
            cut_assert_equal_int(5 ,GRN_TEXT_LEN(&buff));
            cut_assert_equal_substring("fghij", (char*) GRN_BULK_HEAD(&buff),GRN_TEXT_LEN(&buff));
            grn_obj_get_value(context, score_column, id, &score);
            cut_assert_equal_uint(1, GRN_UINT32_VALUE(&score));
        }
        grn_table_cursor_close(context, tc);
        grn_obj_close(context, score_column);
        grn_obj_close(context, res);
    }

    /* boolean search */
    {
        grn_id id, docid;
        grn_obj *res;
        grn_query *query;
        grn_table_cursor *tc;
        grn_obj score, *score_column;
        const char *qstr = "+22 -55";
        res = grn_table_create(context, NULL, 0, NULL,
                               GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, t1, 0);
        GRN_UINT32_INIT(&score, 0);
        query = grn_query_open(context, qstr, strlen(qstr), GRN_OP_OR, 32);
        grn_obj_search(context, ft, (grn_obj*) query, res, GRN_OP_OR, NULL);
        cut_assert_equal_int(1, grn_table_size(context, res));
        score_column = grn_obj_column(context, res, ".:score", 7);
        tc = grn_table_cursor_open(context, res, NULL, 0, NULL, 0, 0, 0, 0);
        while ((id = grn_table_cursor_next(context, tc))) {
            GRN_BULK_REWIND(&buff);
            grn_table_get_key(context, res, id, &docid, sizeof(grn_id));
            cut_assert_equal_int(3, docid);
            cut_assert_not_null(grn_obj_get_value(context, c1, docid, &buff));
            cut_assert_equal_int(8 ,GRN_TEXT_LEN(&buff));
            cut_assert_equal_substring("11 22 33", (char*) GRN_BULK_HEAD(&buff),GRN_TEXT_LEN(&buff));
            grn_obj_get_value(context, score_column, id, &score);
            cut_assert_equal_uint(5, GRN_UINT32_VALUE(&score));
        }
        grn_query_close(context, query);
        grn_table_cursor_close(context ,tc);
        grn_obj_close(context, score_column);
        grn_obj_close(context, res);
    }

    grn_obj_close(context, &buff);
    grn_obj_close(context, ft);
    grn_obj_close(context, c1);
    grn_obj_close(context, lc);
    grn_obj_close(context, t1);
}
Пример #14
0
void
test_mroonga_index_score(void)
{
  grn_obj *t1,*c1,*lc,*ft;
  grn_obj buff;
  grn_id r1,r2,r3,r4;
  const gchar *mrn_dir;

  mrn_dir = cut_build_path(tmp_directory, "mrn", NULL);
  g_mkdir_with_parents(mrn_dir, 0700);

  grn_obj_close(context, db);
  db = grn_db_create(context,
                     cut_build_path(mrn_dir, "mroonga.grn", NULL),
                     NULL);
  cut_assert_not_null(db);

  /* actual table */
  t1 = grn_table_create(context, "t1", 2,
                        cut_build_path(mrn_dir, "t1.grn", NULL),
			GRN_OBJ_TABLE_NO_KEY|GRN_OBJ_PERSISTENT, NULL, 0);
  cut_assert_not_null(t1);

  /* lexicon table */
  lc = grn_table_create(context, "lc", 2,
                        cut_build_path(mrn_dir, "lc.grn", NULL),
			GRN_OBJ_TABLE_PAT_KEY|GRN_OBJ_PERSISTENT,
                        grn_ctx_at(context, GRN_DB_SHORT_TEXT), 0);
  cut_assert_not_null(lc);
  grn_test_assert(grn_obj_set_info(context, lc, GRN_INFO_DEFAULT_TOKENIZER,
				   grn_ctx_at(context, GRN_DB_BIGRAM)));

  /* actual column */
  c1 = grn_column_create(context, t1, "c1", 2,
                         cut_build_path(mrn_dir, "t1.c1.grn", NULL),
			 GRN_OBJ_COLUMN_SCALAR|GRN_OBJ_PERSISTENT,
			 grn_ctx_at(context, GRN_DB_TEXT));
  cut_assert_not_null(c1);

  /* fulltext index */
  ft = grn_column_create(context, lc, "ft", 2,
                         cut_build_path(mrn_dir, "lc.ft.grn", NULL),
			 GRN_OBJ_COLUMN_INDEX|GRN_OBJ_PERSISTENT, t1);
  cut_assert_not_null(ft);

  GRN_TEXT_INIT(&buff,0);

  /* link between actual column and fulltext index */
  GRN_UINT32_SET(context, &buff, grn_obj_id(context, c1));
  grn_obj_set_info(context, ft, GRN_INFO_SOURCE, &buff); /* need to use grn_id */

  /* insert row */
  r1 = grn_table_add(context, t1, NULL, 0, NULL);
  cut_assert_equal_int(1,r1);
  GRN_TEXT_SETS(context, &buff, "abcde");
  grn_test_assert(grn_obj_set_value(context, c1, r1, &buff, GRN_OBJ_SET));

  r2 = grn_table_add(context, t1, NULL, 0, NULL);
  cut_assert_equal_int(2,r2);
  GRN_TEXT_SETS(context, &buff, "fghij");
  grn_test_assert(grn_obj_set_value(context, c1, r2, &buff, GRN_OBJ_SET));

  r3 = grn_table_add(context, t1, NULL, 0, NULL);
  cut_assert_equal_int(3,r3);
  GRN_TEXT_SETS(context, &buff, "11 22 33");
  grn_test_assert(grn_obj_set_value(context, c1, r3, &buff, GRN_OBJ_SET));

  r4 = grn_table_add(context, t1, NULL, 0, NULL);
  cut_assert_equal_int(4,r4);
  GRN_TEXT_SETS(context, &buff, "44 22 55");
  grn_test_assert(grn_obj_set_value(context, c1, r4, &buff, GRN_OBJ_SET));

  /* confirm record are inserted in both column and index */
  cut_assert_equal_int(4,grn_table_size(context,t1));
  cut_assert_equal_int(23,grn_table_size(context,lc));

  /* nlq search */
  {
    grn_id id, docid;
    grn_obj *res;
    grn_table_cursor *tc;
    grn_obj score, *score_column;
    res = grn_table_create(context, NULL, 0, NULL,
                           GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, t1, 0);
    GRN_FLOAT_INIT(&score, 0);
    GRN_BULK_REWIND(&buff);
    GRN_TEXT_SETS(context, &buff, "hij");
    grn_obj_search(context, ft, &buff, res, GRN_OP_OR, NULL);
    cut_assert_equal_int(1, grn_table_size(context, res));
    score_column = grn_obj_column(context, res, "_score", 6);
    tc = grn_table_cursor_open(context, res, NULL, 0, NULL, 0, 0, -1, 0);
    while ((id = grn_table_cursor_next(context, tc))) {
      GRN_BULK_REWIND(&buff);
      grn_table_get_key(context, res, id, &docid, sizeof(grn_id));
      cut_assert_equal_int(2, docid);
      cut_assert_not_null(grn_obj_get_value(context, c1, docid, &buff));
      cut_assert_equal_int(5 ,GRN_TEXT_LEN(&buff));
      cut_assert_equal_substring("fghij", (char*) GRN_BULK_HEAD(&buff),GRN_TEXT_LEN(&buff));
      grn_obj_get_value(context, score_column, id, &score);
      cut_assert_equal_double(1.0, DBL_EPSILON, GRN_FLOAT_VALUE(&score));
    }
    grn_table_cursor_close(context, tc);
    grn_obj_close(context, score_column);
    grn_obj_close(context, res);
  }

  /* boolean search */
  {
    grn_id id, docid;
    grn_obj *res;
    grn_obj *match_columns, *match_columns_variable;
    grn_obj *expression, *expression_variable;
    grn_table_cursor *tc;
    grn_obj score, *score_column;
    const char *match_columns_expression = "c1 * 5";
    const char *qstr = "+22 -55";

    GRN_EXPR_CREATE_FOR_QUERY(context, t1,
                              match_columns, match_columns_variable);
    grn_expr_parse(context, match_columns,
                   match_columns_expression,
                   strlen(match_columns_expression),
                   NULL, GRN_OP_MATCH, GRN_OP_AND,
                   GRN_EXPR_SYNTAX_SCRIPT);
    GRN_EXPR_CREATE_FOR_QUERY(context, t1, expression, expression_variable);
    res = grn_table_create(context, NULL, 0, NULL,
                           GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, t1, 0);
    grn_test_assert(grn_expr_parse(context, expression,
                                   qstr, strlen(qstr),
                                   match_columns,
                                   GRN_OP_MATCH, GRN_OP_OR,
                                   GRN_EXPR_SYNTAX_QUERY));
    grn_table_select(context, t1, expression, res, GRN_OP_OR);
    cut_assert_equal_int(1, grn_table_size(context, res));
    GRN_FLOAT_INIT(&score, 0);
    score_column = grn_obj_column(context, res, "_score", 6);
    tc = grn_table_cursor_open(context, res, NULL, 0, NULL, 0, 0, -1, 0);
    while ((id = grn_table_cursor_next(context, tc))) {
      GRN_BULK_REWIND(&buff);
      grn_table_get_key(context, res, id, &docid, sizeof(grn_id));
      cut_assert_equal_int(3, docid);
      cut_assert_not_null(grn_obj_get_value(context, c1, docid, &buff));
      cut_assert_equal_int(8, GRN_TEXT_LEN(&buff));
      cut_assert_equal_substring("11 22 33", (char*) GRN_BULK_HEAD(&buff),GRN_TEXT_LEN(&buff));
      grn_obj_get_value(context, score_column, id, &score);
      cut_assert_equal_double(5, DBL_EPSILON, GRN_FLOAT_VALUE(&score));
    }
    grn_obj_close(context, expression);
    grn_obj_close(context, match_columns);
    grn_table_cursor_close(context ,tc);
    grn_obj_close(context, score_column);
    grn_obj_close(context, res);
  }

  grn_obj_close(context, &buff);
  grn_obj_close(context, ft);
  grn_obj_close(context, c1);
  grn_obj_close(context, lc);
  grn_obj_close(context, t1);
}
Пример #15
0
void
test_scan_search(void)
{
  grn_obj *cond, *expr, *v, textbuf, intbuf;
  GRN_TEXT_INIT(&textbuf, 0);
  GRN_UINT32_INIT(&intbuf, 0);

  prepare_data(&textbuf, &intbuf);

  cut_assert_not_null((cond = grn_expr_create(&context, NULL, 0)));
  v = grn_expr_add_var(&context, cond, NULL, 0);
  GRN_RECORD_INIT(v, 0, grn_obj_id(&context, docs));
  grn_expr_append_obj(&context, cond, v);
  GRN_TEXT_SETS(&context, &textbuf, "size");
  grn_expr_append_const(&context, cond, &textbuf);
  grn_expr_append_op(&context, cond, GRN_OP_OBJ_GET_VALUE, 2);
  GRN_UINT32_SET(&context, &intbuf, 14);
  grn_expr_append_const(&context, cond, &intbuf);
  grn_expr_append_op(&context, cond, GRN_OP_EQUAL, 2);
  grn_expr_compile(&context, cond);

  cut_assert_not_null((expr = grn_expr_create(&context, NULL, 0)));

  v = grn_expr_add_var(&context, expr, NULL, 0);

  GRN_BULK_REWIND(&textbuf);
  grn_expr_append_const(&context, expr, &textbuf);
  GRN_UINT32_SET(&context, &intbuf, GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC);
  grn_expr_append_const(&context, expr, &intbuf);
  grn_expr_append_obj(&context, expr, docs);
  GRN_UINT32_SET(&context, &intbuf, 0);
  grn_expr_append_const(&context, expr, &intbuf);
  grn_expr_append_op(&context, expr, GRN_OP_TABLE_CREATE, 4);

  grn_expr_append_obj(&context, expr, v);
  grn_expr_append_op(&context, expr, GRN_OP_VAR_SET_VALUE, 2);

  grn_expr_append_obj(&context, expr, docs);
  grn_expr_append_obj(&context, expr, cond);
  grn_expr_append_obj(&context, expr, v);
  GRN_UINT32_SET(&context, &intbuf, GRN_SEL_OR);
  grn_expr_append_const(&context, expr, &intbuf);
  grn_expr_append_op(&context, expr, GRN_OP_TABLE_SCAN, 4);

  grn_expr_append_obj(&context, expr, index_body);
  GRN_TEXT_SETS(&context, &textbuf, "moge");
  grn_expr_append_const(&context, expr, &textbuf);
  grn_expr_append_obj(&context, expr, v);
  GRN_UINT32_SET(&context, &intbuf, GRN_SEL_AND);
  grn_expr_append_const(&context, expr, &intbuf);
  grn_expr_append_op(&context, expr, GRN_OP_OBJ_SEARCH, 4);

  grn_expr_append_obj(&context, expr, v);
  GRN_TEXT_SETS(&context, &textbuf, ".size .:score .body");
  grn_expr_append_const(&context, expr, &textbuf);
  GRN_BULK_REWIND(&textbuf);
  grn_expr_append_obj(&context, expr, &textbuf);
  grn_expr_append_op(&context, expr, GRN_OP_JSON_PUT, 3);

  grn_expr_exec(&context, expr);

  cut_assert_equal_substring("[[14, 4, \"moge moge moge\"], [14, 2, \"moge hoge hoge\"]]",
                             GRN_TEXT_VALUE(&textbuf), GRN_TEXT_LEN(&textbuf));

  grn_test_assert(grn_obj_close(&context, expr));
  grn_test_assert(grn_obj_close(&context, cond));
  grn_test_assert(grn_obj_close(&context, &textbuf));
  grn_test_assert(grn_obj_close(&context, &intbuf));
}
Пример #16
0
static grn_obj *
command_tag_synonym(grn_ctx *ctx, GNUC_UNUSED int nargs, GNUC_UNUSED grn_obj **args,
                    GNUC_UNUSED grn_user_data *user_data)
{
  GNUC_UNUSED grn_obj *flags = grn_ctx_pop(ctx);
  grn_obj *newvalue = grn_ctx_pop(ctx);
  grn_obj *oldvalue = grn_ctx_pop(ctx);
  GNUC_UNUSED grn_obj *id = grn_ctx_pop(ctx);
  grn_obj buf;
  grn_obj record;
  grn_obj *domain;
  grn_obj *table;
  grn_obj *column;
  int i,n;

  if (GRN_BULK_VSIZE(newvalue) == 0 || GRN_INT32_VALUE(flags) == 0) {
    return NULL;
  }

  table = grn_ctx_at(ctx, oldvalue->header.domain);
  if (table && !is_table(table)) {
    GRN_PLUGIN_LOG(ctx, GRN_LOG_WARNING,
                   "[tag-synonym] "
                   "hooked column must be reference type");
    return NULL;
  }

  column = grn_obj_column(ctx,
                          table,
                          SYNONYM_COLUMN_NAME,
                          SYNONYM_COLUMN_NAME_LEN);
  if (!column) {
    GRN_PLUGIN_LOG(ctx, GRN_LOG_WARNING,
                   "[tag-synonym] "
                   "couldn't open synonym column");
    return NULL;
  }

  GRN_TEXT_INIT(&buf, 0);
  domain = grn_ctx_at(ctx, newvalue->header.domain);
  if (domain && is_string(domain)) {
    GRN_RECORD_INIT(&record, GRN_OBJ_VECTOR, oldvalue->header.domain);
    grn_table_tokenize(ctx, table, GRN_TEXT_VALUE(newvalue), GRN_TEXT_LEN(newvalue), &record, GRN_TRUE);
  } else if (newvalue->header.type == GRN_UVECTOR) {
    record = *newvalue;
  }

  if (is_string(domain) || newvalue->header.type == GRN_UVECTOR) {
    grn_obj value;

    GRN_RECORD_INIT(newvalue, GRN_OBJ_VECTOR, oldvalue->header.domain);
    GRN_UINT32_INIT(&value, 0);
    n = grn_vector_size(ctx, &record);
    for (i = 0; i < n; i++) {
      grn_id tid;
      tid = grn_uvector_get_element(ctx, &record, i, NULL);
      GRN_BULK_REWIND(&value);
      grn_obj_get_value(ctx, column, tid, &value);
      if (GRN_UINT32_VALUE(&value)) {
        GRN_PLUGIN_LOG(ctx, GRN_LOG_INFO,
                       "[tag-synonym] "
                       "changed: tid %d -> %d", tid, GRN_UINT32_VALUE(&value));
        tid = GRN_UINT32_VALUE(&value);
      }
      grn_uvector_add_element(ctx, newvalue, tid, 0);
    }
    grn_obj_unlink(ctx, &value);
  } else {
    grn_id tid;
    grn_obj value;
    tid = GRN_RECORD_VALUE(newvalue);
    GRN_UINT32_INIT(&value, 0);
    grn_obj_get_value(ctx, column, tid, &value);
    if (GRN_UINT32_VALUE(&value)) {
      GRN_PLUGIN_LOG(ctx, GRN_LOG_INFO,
                     "[tag-synonym] "
                     "changed: tid %d -> %d", tid, GRN_UINT32_VALUE(&value));
      tid = GRN_UINT32_VALUE(&value);
      GRN_BULK_REWIND(newvalue);
      GRN_RECORD_SET(ctx, newvalue, tid);
    }
    grn_obj_unlink(ctx, &value);
  }
  grn_obj_unlink(ctx, &buf);

  return NULL;
}
Пример #17
0
/*
 * It updates a value of variable size column value for the record
 * that ID is _id_.
 *
 * Weight vector column is a special variable size column. This
 * description describes only weight vector column. Other variable
 * size column works what you think.
 *
 * @example Use weight vector as matrix search result weight
 *    Groonga::Schema.define do |schema|
 *      schema.create_table("Products",
 *                          :type => :patricia_trie,
 *                          :key_type => "ShortText") do |table|
 *        # This is weight vector.
 *        # ":with_weight => true" is important for matrix search result weight.
 *        table.short_text("tags",
 *                         :type => :vector,
 *                         :with_weight => true)
 *      end
 *
 *      schema.create_table("Tags",
 *                          :type => :hash,
 *                          :key_type => "ShortText") do |table|
 *        # This is inverted index. It also needs ":with_weight => true".
 *        table.index("Products.tags", :with_weight => true)
 *      end
 *    end
 *
 *    products = Groonga["Products"]
 *    groonga = products.add("Groonga")
 *    groonga.tags = [
 *      {
 *        :value  => "groonga",
 *        :weight => 100,
 *      },
 *    ]
 *    rroonga = products.add("Rroonga")
 *    rroonga.tags = [
 *      {
 *        :value  => "ruby",
 *        :weight => 100,
 *      },
 *      {
 *        :value  => "groonga",
 *        :weight => 10,
 *      },
 *    ]
 *
 *    result = products.select do |record|
 *      # Search by "groonga"
 *      record.match("groonga") do |match_target|
 *        match_target.tags
 *      end
 *    end
 *
 *    result.each do |record|
 *      p [record.key.key, record.score]
 *    end
 *    # Matches all records with weight.
 *    # => ["Groonga", 101]
 *    #    ["Rroonga", 11]
 *
 *    # Increases score for "ruby" 10 times
 *    products.select(# The previous search result. Required.
 *                    :result => result,
 *                    # It just adds score to existing records in the result. Required.
 *                    :operator => Groonga::Operator::ADJUST) do |record|
 *      record.match("ruby") do |target|
 *        target.tags * 10 # 10 times
 *      end
 *    end
 *
 *    result.each do |record|
 *      p [record.key.key, record.score]
 *    end
 *    # Weight is used for increasing score.
 *    # => ["Groonga", 101]  <- Not changed.
 *    #    ["Rroonga", 1021] <- 1021 (= 101 * 10 + 1) increased.
 *
 * @overload []=(id, elements)
 *   This description is for weight vector column.
 *
 *   @param [Integer, Record] id The record ID.
 *   @param [Array<Hash<Symbol, String>>] elements An array of values
 *     for weight vector.
 *     Each value is a Hash like the following form:
 *
 *     <pre>
 *     {
 *       :value  => [KEY],
 *       :weight => [WEIGHT],
 *     }
 *     </pre>
 *
 *     @[KEY]@ must be the same type of the key of the table that is
 *     specified as range on creating the weight vector.
 *
 *     @[WEIGHT]@ must be an positive integer. Note that search
 *     becomes @weight + 1@. It means that You want to get 10 as
 *     score, you should set 9 as weight.
 *
 * @overload []=(id, value)
 *   This description is for variable size columns except weight
 *   vector column.
 *
 *   @param [Integer, Record] id The record ID.
 *   @param [::Object] value A new value.
 *   @see Groonga::Object#[]=
 *
 * @since 4.0.1
 */
static VALUE
rb_grn_variable_size_column_array_set (VALUE self, VALUE rb_id, VALUE rb_value)
{
    grn_ctx *context = NULL;
    grn_obj *column, *range;
    grn_rc rc;
    grn_id id;
    grn_obj *value, *element_value;
    int flags = GRN_OBJ_SET;

    rb_grn_variable_size_column_deconstruct(SELF(self), &column, &context,
                                            NULL, NULL, &value, &element_value,
                                            NULL, &range);

    if (!(column->header.flags & GRN_OBJ_WITH_WEIGHT)) {
        VALUE args[2];
        args[0] = rb_id;
        args[1] = rb_value;
        return rb_call_super(2, args);
    }

    id = RVAL2GRNID(rb_id, context, range, self);

    grn_obj_reinit(context, value,
                   value->header.domain,
                   value->header.flags | GRN_OBJ_VECTOR);
    value->header.flags |= GRN_OBJ_WITH_WEIGHT;
    if (RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cArray))) {
        int i, n;
        n = RARRAY_LEN(rb_value);
        for (i = 0; i < n; i++) {
            unsigned int weight = 0;
            VALUE rb_element_value, rb_weight;

            rb_grn_scan_options(RARRAY_PTR(rb_value)[i],
                                "value", &rb_element_value,
                                "weight", &rb_weight,
                                NULL);

            if (!NIL_P(rb_weight)) {
                weight = NUM2UINT(rb_weight);
            }

            if (value->header.type == GRN_UVECTOR) {
                grn_id id = RVAL2GRNID(rb_element_value, context, range, self);
                grn_uvector_add_element(context, value, id, weight);
            } else {
                GRN_BULK_REWIND(element_value);
                if (!NIL_P(rb_element_value)) {
                    RVAL2GRNBULK(rb_element_value, context, element_value);
                }

                grn_vector_add_element(context, value,
                                       GRN_BULK_HEAD(element_value),
                                       GRN_BULK_VSIZE(element_value),
                                       weight,
                                       element_value->header.domain);
            }
        }
    } else if (RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cHash))) {
        HashElementToVectorElementData data;
        data.self = self;
        data.context = context;
        data.vector = value;
        data.element_value = element_value;
        data.range = range;
        rb_hash_foreach(rb_value, hash_element_to_vector_element, (VALUE)&data);
    } else {
        rb_raise(rb_eArgError,
                 "<%s>: "
                 "weight vector value must be an array of index value or "
                 "a hash that key is vector value and value is vector weight: "
                 "<%s>",
                 rb_grn_inspect(self),
                 rb_grn_inspect(rb_value));
    }

    rc = grn_obj_set_value(context, column, id, value, flags);
    rb_grn_context_check(context, self);
    rb_grn_rc_check(rc, self);

    return rb_value;
}
Пример #18
0
static grn_obj *
command_tokenize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  grn_obj *tokenizer_name;
  grn_obj *string;
  grn_obj *normalizer_name;
  grn_obj *flag_names;
  grn_obj *mode_name;
  grn_obj *token_filter_names;

  tokenizer_name = grn_plugin_proc_get_var(ctx, user_data, "tokenizer", -1);
  string = grn_plugin_proc_get_var(ctx, user_data, "string", -1);
  normalizer_name = grn_plugin_proc_get_var(ctx, user_data, "normalizer", -1);
  flag_names = grn_plugin_proc_get_var(ctx, user_data, "flags", -1);
  mode_name = grn_plugin_proc_get_var(ctx, user_data, "mode", -1);
  token_filter_names = grn_plugin_proc_get_var(ctx, user_data, "token_filters", -1);

  if (GRN_TEXT_LEN(tokenizer_name) == 0) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "[tokenize] tokenizer name is missing");
    return NULL;
  }

  if (GRN_TEXT_LEN(string) == 0) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "[tokenize] string is missing");
    return NULL;
  }

  {
    unsigned int flags;
    grn_obj *lexicon;

    flags = parse_tokenize_flags(ctx, flag_names);
    if (ctx->rc != GRN_SUCCESS) {
      return NULL;
    }

    lexicon = create_lexicon_for_tokenize(ctx,
                                          tokenizer_name,
                                          normalizer_name,
                                          token_filter_names);
    if (!lexicon) {
      return NULL;
    }
#define MODE_NAME_EQUAL(name)\
    (GRN_TEXT_LEN(mode_name) == strlen(name) &&\
     memcmp(GRN_TEXT_VALUE(mode_name), name, strlen(name)) == 0)

    {
      grn_obj tokens;
      GRN_VALUE_FIX_SIZE_INIT(&tokens, GRN_OBJ_VECTOR, GRN_ID_NIL);
      if (GRN_TEXT_LEN(mode_name) == 0 || MODE_NAME_EQUAL("ADD")) {
        tokenize(ctx, lexicon, string, GRN_TOKEN_ADD, flags, &tokens);
        output_tokens(ctx, &tokens, lexicon, NULL);
      } else if (MODE_NAME_EQUAL("GET")) {
        tokenize(ctx, lexicon, string, GRN_TOKEN_ADD, flags, &tokens);
        GRN_BULK_REWIND(&tokens);
        tokenize(ctx, lexicon, string, GRN_TOKEN_GET, flags, &tokens);
        output_tokens(ctx, &tokens, lexicon, NULL);
      } else {
        GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                         "[tokenize] invalid mode: <%.*s>",
                         (int)GRN_TEXT_LEN(mode_name), GRN_TEXT_VALUE(mode_name));
      }
      GRN_OBJ_FIN(ctx, &tokens);
    }
#undef MODE_NAME_EQUAL

    grn_obj_unlink(ctx, lexicon);
  }

  return NULL;
}
Пример #19
0
MRN_API char *mroonga_snippet_html(UDF_INIT *init,
                                   UDF_ARGS *args,
                                   char *result,
                                   unsigned long *length,
                                   char *is_null,
                                   char *error)
{
  MRN_DBUG_ENTER_FUNCTION();

  mrn_snippet_html_info *info =
    reinterpret_cast<mrn_snippet_html_info *>(init->ptr);

  grn_ctx *ctx = info->ctx;
  grn_obj *snippet = info->snippet;
  grn_obj *result_buffer = &(info->result);

  if (!args->args[0]) {
    *is_null = 1;
    DBUG_RETURN(NULL);
  }

  if (!snippet) {
    if (mrn_snippet_html_prepare(info, args, NULL, &snippet)) {
      goto error;
    }
  }

  {
    char *target = args->args[0];
    unsigned int target_length = args->lengths[0];

    unsigned int n_results, max_tagged_length;
    {
      grn_rc rc = grn_snip_exec(ctx, snippet, target, target_length,
                                &n_results, &max_tagged_length);
      if (rc != GRN_SUCCESS) {
        my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM,
                        ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf);
        goto error;
      }
    }

    *is_null = 0;
    GRN_BULK_REWIND(result_buffer);

    {
      const char *start_tag = "<div class=\"snippet\">";
      const char *end_tag = "</div>";
      size_t start_tag_length = strlen(start_tag);
      size_t end_tag_length = strlen(end_tag);
      for (unsigned int i = 0; i < n_results; ++i) {
        GRN_TEXT_PUT(ctx, result_buffer, start_tag, start_tag_length);

        grn_bulk_reserve(ctx, result_buffer, max_tagged_length);
        unsigned int result_length;
        grn_rc rc =
          grn_snip_get_result(ctx, snippet, i,
                              GRN_BULK_CURR(result_buffer),
                              &result_length);
        if (rc) {
          my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM,
                          ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf);
          goto error;
        }
        grn_bulk_space(ctx, result_buffer, result_length);

        GRN_TEXT_PUT(ctx, result_buffer, end_tag, end_tag_length);
      }
    }

    if (!info->snippet) {
      grn_rc rc = grn_obj_close(ctx, snippet);
      if (rc != GRN_SUCCESS) {
        my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM,
                        ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf);
        goto error;
      }
    }
  }

  *length = GRN_TEXT_LEN(result_buffer);
  DBUG_RETURN(GRN_TEXT_VALUE(result_buffer));

error:
  if (!info->snippet && snippet) {
    grn_obj_close(ctx, snippet);
  }

  *is_null = 1;
  *error = 1;

  DBUG_RETURN(NULL);
}
Пример #20
0
static void
json_read(grn_ctx *ctx, grn_loader *loader, const char *str, unsigned int str_len)
{
  const char *const beg = str;
  char c;
  int len;
  const char *se = str + str_len;
  while (str < se) {
    c = *str;
    switch (loader->stat) {
    case GRN_LOADER_BEGIN :
      if ((len = grn_isspace(str, ctx->encoding))) {
        str += len;
        continue;
      }
      switch (c) {
      case '[' :
        JSON_READ_OPEN_BRACKET();
        break;
      case '{' :
        JSON_READ_OPEN_BRACE();
        break;
      default :
        ERR(GRN_INVALID_ARGUMENT,
            "JSON must start with '[' or '{': <%.*s>", str_len, beg);
        loader->stat = GRN_LOADER_END;
        break;
      }
      break;
    case GRN_LOADER_TOKEN :
      if ((len = grn_isspace(str, ctx->encoding))) {
        str += len;
        continue;
      }
      switch (c) {
      case '"' :
        loader->stat = GRN_LOADER_STRING;
        values_add(ctx, loader);
        str++;
        break;
      case '[' :
        JSON_READ_OPEN_BRACKET();
        break;
      case '{' :
        JSON_READ_OPEN_BRACE();
        break;
      case ':' :
        str++;
        break;
      case ',' :
        str++;
        break;
      case ']' :
        bracket_close(ctx, loader);
        loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
        if (ctx->rc == GRN_CANCEL) {
          loader->stat = GRN_LOADER_END;
        }
        str++;
        break;
      case '}' :
        brace_close(ctx, loader);
        loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
        if (ctx->rc == GRN_CANCEL) {
          loader->stat = GRN_LOADER_END;
        }
        str++;
        break;
      case '+' : case '-' : case '0' : case '1' : case '2' : case '3' :
      case '4' : case '5' : case '6' : case '7' : case '8' : case '9' :
        loader->stat = GRN_LOADER_NUMBER;
        values_add(ctx, loader);
        break;
      default :
        if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('_' == c)) {
          loader->stat = GRN_LOADER_SYMBOL;
          values_add(ctx, loader);
        } else {
          if ((len = grn_charlen(ctx, str, se))) {
            GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char('%c') at", c);
            GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg) + len, beg);
            GRN_LOG(ctx, GRN_LOG_ERROR, "%*s", (int)(str - beg) + 1, "^");
            str += len;
          } else {
            GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char(\\x%.2x) after", c);
            GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg), beg);
            str = se;
          }
        }
        break;
      }
      break;
    case GRN_LOADER_SYMBOL :
      if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') ||
          ('0' <= c && c <= '9') || ('_' == c)) {
        GRN_TEXT_PUTC(ctx, loader->last, c);
        str++;
      } else {
        char *v = GRN_TEXT_VALUE(loader->last);
        switch (*v) {
        case 'n' :
          if (GRN_TEXT_LEN(loader->last) == 4 && !memcmp(v, "null", 4)) {
            loader->last->header.domain = GRN_DB_VOID;
            GRN_BULK_REWIND(loader->last);
          }
          break;
        case 't' :
          if (GRN_TEXT_LEN(loader->last) == 4 && !memcmp(v, "true", 4)) {
            loader->last->header.domain = GRN_DB_BOOL;
            GRN_BOOL_SET(ctx, loader->last, GRN_TRUE);
          }
          break;
        case 'f' :
          if (GRN_TEXT_LEN(loader->last) == 5 && !memcmp(v, "false", 5)) {
            loader->last->header.domain = GRN_DB_BOOL;
            GRN_BOOL_SET(ctx, loader->last, GRN_FALSE);
          }
          break;
        default :
          break;
        }
        loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
      }
      break;
    case GRN_LOADER_NUMBER :
      switch (c) {
      case '+' : case '-' : case '.' : case 'e' : case 'E' :
      case '0' : case '1' : case '2' : case '3' : case '4' :
      case '5' : case '6' : case '7' : case '8' : case '9' :
        GRN_TEXT_PUTC(ctx, loader->last, c);
        str++;
        break;
      default :
        {
          const char *cur, *str = GRN_BULK_HEAD(loader->last);
          const char *str_end = GRN_BULK_CURR(loader->last);
          int64_t i = grn_atoll(str, str_end, &cur);
          if (cur == str_end) {
            loader->last->header.domain = GRN_DB_INT64;
            GRN_INT64_SET(ctx, loader->last, i);
          } else if (cur != str) {
            uint64_t i = grn_atoull(str, str_end, &cur);
            if (cur == str_end) {
              loader->last->header.domain = GRN_DB_UINT64;
              GRN_UINT64_SET(ctx, loader->last, i);
            } else if (cur != str) {
              double d;
              char *end;
              grn_obj buf;
              GRN_TEXT_INIT(&buf, 0);
              GRN_TEXT_PUT(ctx, &buf, str, GRN_BULK_VSIZE(loader->last));
              GRN_TEXT_PUTC(ctx, &buf, '\0');
              errno = 0;
              d = strtod(GRN_TEXT_VALUE(&buf), &end);
              if (!errno && end + 1 == GRN_BULK_CURR(&buf)) {
                loader->last->header.domain = GRN_DB_FLOAT;
                GRN_FLOAT_SET(ctx, loader->last, d);
              }
              GRN_OBJ_FIN(ctx, &buf);
            }
          }
        }
        loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
        break;
      }
      break;
    case GRN_LOADER_STRING :
      switch (c) {
      case '\\' :
        loader->stat = GRN_LOADER_STRING_ESC;
        str++;
        break;
      case '"' :
        str++;
        loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
        /*
        *(GRN_BULK_CURR(loader->last)) = '\0';
        GRN_LOG(ctx, GRN_LOG_ALERT, "read str(%s)", GRN_TEXT_VALUE(loader->last));
        */
        break;
      default :
        if ((len = grn_charlen(ctx, str, se))) {
          GRN_TEXT_PUT(ctx, loader->last, str, len);
          str += len;
        } else {
          GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char(\\x%.2x) after", c);
          GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg), beg);
          str = se;
        }
        break;
      }
      break;
    case GRN_LOADER_STRING_ESC :
      switch (c) {
      case 'b' :
        GRN_TEXT_PUTC(ctx, loader->last, '\b');
        loader->stat = GRN_LOADER_STRING;
        break;
      case 'f' :
        GRN_TEXT_PUTC(ctx, loader->last, '\f');
        loader->stat = GRN_LOADER_STRING;
        break;
      case 'n' :
        GRN_TEXT_PUTC(ctx, loader->last, '\n');
        loader->stat = GRN_LOADER_STRING;
        break;
      case 'r' :
        GRN_TEXT_PUTC(ctx, loader->last, '\r');
        loader->stat = GRN_LOADER_STRING;
        break;
      case 't' :
        GRN_TEXT_PUTC(ctx, loader->last, '\t');
        loader->stat = GRN_LOADER_STRING;
        break;
      case 'u' :
        loader->stat = GRN_LOADER_UNICODE0;
        break;
      default :
        GRN_TEXT_PUTC(ctx, loader->last, c);
        loader->stat = GRN_LOADER_STRING;
        break;
      }
      str++;
      break;
    case GRN_LOADER_UNICODE0 :
      switch (c) {
      case '0' : case '1' : case '2' : case '3' : case '4' :
      case '5' : case '6' : case '7' : case '8' : case '9' :
        loader->unichar = (c - '0') * 0x1000;
        break;
      case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
        loader->unichar = (c - 'a' + 10) * 0x1000;
        break;
      case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
        loader->unichar = (c - 'A' + 10) * 0x1000;
        break;
      default :
        ;// todo : error
      }
      loader->stat = GRN_LOADER_UNICODE1;
      str++;
      break;
    case GRN_LOADER_UNICODE1 :
      switch (c) {
      case '0' : case '1' : case '2' : case '3' : case '4' :
      case '5' : case '6' : case '7' : case '8' : case '9' :
        loader->unichar += (c - '0') * 0x100;
        break;
      case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
        loader->unichar += (c - 'a' + 10) * 0x100;
        break;
      case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
        loader->unichar += (c - 'A' + 10) * 0x100;
        break;
      default :
        ;// todo : error
      }
      loader->stat = GRN_LOADER_UNICODE2;
      str++;
      break;
    case GRN_LOADER_UNICODE2 :
      switch (c) {
      case '0' : case '1' : case '2' : case '3' : case '4' :
      case '5' : case '6' : case '7' : case '8' : case '9' :
        loader->unichar += (c - '0') * 0x10;
        break;
      case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
        loader->unichar += (c - 'a' + 10) * 0x10;
        break;
      case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
        loader->unichar += (c - 'A' + 10) * 0x10;
        break;
      default :
        ;// todo : error
      }
      loader->stat = GRN_LOADER_UNICODE3;
      str++;
      break;
    case GRN_LOADER_UNICODE3 :
      switch (c) {
      case '0' : case '1' : case '2' : case '3' : case '4' :
      case '5' : case '6' : case '7' : case '8' : case '9' :
        loader->unichar += (c - '0');
        break;
      case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
        loader->unichar += (c - 'a' + 10);
        break;
      case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
        loader->unichar += (c - 'A' + 10);
        break;
      default :
        ;// todo : error
      }
      {
        uint32_t u = loader->unichar;
        if (u >= 0xd800 && u <= 0xdbff) { /* High-surrogate code points */
          loader->unichar_hi = u;
          loader->stat = GRN_LOADER_STRING;
          str++;
          break;
        }
        if (u >= 0xdc00 && u <= 0xdfff) { /* Low-surrogate code points */
          u = 0x10000 + (loader->unichar_hi - 0xd800) * 0x400 + u - 0xdc00;
        }
        if (u < 0x80) {
          GRN_TEXT_PUTC(ctx, loader->last, u);
        } else {
          if (u < 0x800) {
            GRN_TEXT_PUTC(ctx, loader->last, (u >> 6) | 0xc0);
          } else {
            if (u < 0x10000) {
              GRN_TEXT_PUTC(ctx, loader->last, (u >> 12) | 0xe0);
            } else {
              GRN_TEXT_PUTC(ctx, loader->last, (u >> 18) | 0xf0);
              GRN_TEXT_PUTC(ctx, loader->last, ((u >> 12) & 0x3f) | 0x80);
            }
            GRN_TEXT_PUTC(ctx, loader->last, ((u >> 6) & 0x3f) | 0x80);
          }
          GRN_TEXT_PUTC(ctx, loader->last, (u & 0x3f) | 0x80);
        }
Пример #21
0
static grn_obj *
regexp_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  int char_len;
  grn_token_status status = 0;
  grn_regexp_tokenizer *tokenizer = user_data->ptr;
  unsigned int n_characters = 0;
  int ngram_unit = 2;
  grn_obj *buffer = &(tokenizer->buffer);
  const char *current = tokenizer->next;
  const char *end = tokenizer->end;
  grn_tokenize_mode mode = tokenizer->query->tokenize_mode;
  grn_bool escaping = GRN_FALSE;

  GRN_BULK_REWIND(buffer);

  if (mode == GRN_TOKEN_GET) {
    if (tokenizer->get.have_begin) {
      grn_tokenizer_token_push(ctx,
                               &(tokenizer->token),
                               GRN_TOKENIZER_BEGIN_MARK_UTF8,
                               GRN_TOKENIZER_BEGIN_MARK_UTF8_LEN,
                               status);
      tokenizer->get.have_begin = GRN_FALSE;
      return NULL;
    }

    if (tokenizer->is_end && tokenizer->get.have_end) {
      status |= GRN_TOKEN_LAST | GRN_TOKEN_REACH_END;
      grn_tokenizer_token_push(ctx,
                               &(tokenizer->token),
                               GRN_TOKENIZER_END_MARK_UTF8,
                               GRN_TOKENIZER_END_MARK_UTF8_LEN,
                               status);
      return NULL;
    }
  } else {
    if (tokenizer->is_begin) {
      grn_tokenizer_token_push(ctx,
                               &(tokenizer->token),
                               GRN_TOKENIZER_BEGIN_MARK_UTF8,
                               GRN_TOKENIZER_BEGIN_MARK_UTF8_LEN,
                               status);
      tokenizer->is_begin = GRN_FALSE;
      return NULL;
    }

    if (tokenizer->is_end) {
      status |= GRN_TOKEN_LAST | GRN_TOKEN_REACH_END;
      grn_tokenizer_token_push(ctx,
                               &(tokenizer->token),
                               GRN_TOKENIZER_END_MARK_UTF8,
                               GRN_TOKENIZER_END_MARK_UTF8_LEN,
                               status);
      return NULL;
    }
  }

  char_len = grn_charlen_(ctx, current, end, tokenizer->query->encoding);
  if (char_len == 0) {
    status |= GRN_TOKEN_LAST | GRN_TOKEN_REACH_END;
    grn_tokenizer_token_push(ctx, &(tokenizer->token), "", 0, status);
    return NULL;
  }

  while (GRN_TRUE) {
    if (!escaping && mode == GRN_TOKEN_GET &&
        char_len == 1 && current[0] == '\\') {
      current += char_len;
      escaping = GRN_TRUE;
    } else {
      n_characters++;
      GRN_TEXT_PUT(ctx, buffer, current, char_len);
      current += char_len;
      escaping = GRN_FALSE;
      if (n_characters == 1) {
        tokenizer->next = current;
      }
      if (n_characters == ngram_unit) {
        break;
      }
    }

    char_len = grn_charlen_(ctx, (const char *)current, (const char *)end,
                            tokenizer->query->encoding);
    if (char_len == 0) {
      break;
    }
  }

  if (tokenizer->is_overlapping) {
    status |= GRN_TOKEN_OVERLAP;
  }
  if (n_characters < ngram_unit) {
    status |= GRN_TOKEN_UNMATURED;
  }
  tokenizer->is_overlapping = (n_characters > 1);

  if (mode == GRN_TOKEN_GET) {
    if ((end - tokenizer->next) < ngram_unit) {
      if (tokenizer->get.have_end) {
        if (tokenizer->next == end) {
          tokenizer->is_end = GRN_TRUE;
        }
        if (status & GRN_TOKEN_UNMATURED) {
          if (tokenizer->is_first_token) {
            status |= GRN_TOKEN_FORCE_PREFIX;
          } else {
            status |= GRN_TOKEN_SKIP;
          }
        }
      } else {
        tokenizer->is_end = GRN_TRUE;
        status |= GRN_TOKEN_LAST | GRN_TOKEN_REACH_END;
        if (status & GRN_TOKEN_UNMATURED) {
          status |= GRN_TOKEN_FORCE_PREFIX;
        }
      }
    } else {
      if (tokenizer->get.n_skip_tokens > 0) {
        tokenizer->get.n_skip_tokens--;
        status |= GRN_TOKEN_SKIP;
      } else {
        tokenizer->get.n_skip_tokens = ngram_unit - 1;
      }
    }
  } else {
    if (tokenizer->next == end) {
      tokenizer->is_end = GRN_TRUE;
    }
  }

  grn_tokenizer_token_push(ctx,
                           &(tokenizer->token),
                           GRN_TEXT_VALUE(buffer),
                           GRN_TEXT_LEN(buffer),
                           status);
  tokenizer->is_first_token = GRN_FALSE;

  return NULL;
}
Пример #22
0
static void
grn_text_atoj(grn_ctx *ctx, grn_obj *outbuf, grn_content_type output_type,
              grn_obj *obj, grn_id id)
{
  int vs;
  grn_obj buf;
  if (obj->header.type == GRN_ACCESSOR) {
    grn_accessor *a = (grn_accessor *)obj;
    GRN_TEXT_INIT(&buf, 0);
    for (;;) {
      buf.header.domain = grn_obj_get_range(ctx, obj);
      GRN_BULK_REWIND(&buf);
      switch (a->action) {
      case GRN_ACCESSOR_GET_ID :
        GRN_UINT32_PUT(ctx, &buf, id);
        buf.header.domain = GRN_DB_UINT32;
        break;
      case GRN_ACCESSOR_GET_KEY :
        grn_table_get_key2(ctx, a->obj, id, &buf);
        buf.header.domain = DB_OBJ(a->obj)->header.domain;
        break;
      case GRN_ACCESSOR_GET_VALUE :
        grn_obj_get_value(ctx, a->obj, id, &buf);
        buf.header.domain = GRN_DB_INT32; /* fix me */
        break;
      case GRN_ACCESSOR_GET_SCORE :
        grn_obj_get_value(ctx, a->obj, id, &buf);
        {
          grn_rset_recinfo *ri = (grn_rset_recinfo *)grn_obj_get_value_(ctx, a->obj, id, &vs);
          GRN_INT32_PUT(ctx, &buf, ri->score);
        }
        buf.header.domain = GRN_DB_INT32;
        break;
      case GRN_ACCESSOR_GET_NSUBRECS :
        {
          grn_rset_recinfo *ri = (grn_rset_recinfo *)grn_obj_get_value_(ctx, a->obj, id, &vs);
          GRN_INT32_PUT(ctx, &buf, ri->n_subrecs);
        }
        buf.header.domain = GRN_DB_INT32;
        break;
      case GRN_ACCESSOR_GET_COLUMN_VALUE :
        if ((a->obj->header.flags & GRN_OBJ_COLUMN_TYPE_MASK) == GRN_OBJ_COLUMN_VECTOR) {
          if (a->next) {
            grn_id *idp;
            grn_obj_get_value(ctx, a->obj, id, &buf);
            idp = (grn_id *)GRN_BULK_HEAD(&buf);
            vs = GRN_BULK_VSIZE(&buf) / sizeof(grn_id);
            grn_output_array_open(ctx, outbuf, output_type, "COLUMN", vs);
            for (; vs--; idp++) {
              grn_text_atoj(ctx, outbuf, output_type, (grn_obj *)a->next, *idp);
            }
            grn_output_array_close(ctx, outbuf, output_type);
          } else {
            grn_text_atoj(ctx, outbuf, output_type, a->obj, id);
          }
          goto exit;
        } else {
          grn_obj_get_value(ctx, a->obj, id, &buf);
        }
        break;
      case GRN_ACCESSOR_GET_DB_OBJ :
        /* todo */
        break;
      case GRN_ACCESSOR_LOOKUP :
        /* todo */
        break;
      case GRN_ACCESSOR_FUNCALL :
        /* todo */
        break;
      }
      if (a->next) {
        a = a->next;
        if (GRN_BULK_VSIZE(&buf) >= sizeof(grn_id)) {
          id = *((grn_id *)GRN_BULK_HEAD(&buf));
        } else {
          id = GRN_ID_NIL;
        }
      } else {
        break;
      }
    }
  } else {
    switch (obj->header.type) {
    case GRN_COLUMN_FIX_SIZE :
      GRN_VALUE_FIX_SIZE_INIT(&buf, 0, DB_OBJ(obj)->range);
      break;
    case GRN_COLUMN_VAR_SIZE :
      if ((obj->header.flags & GRN_OBJ_COLUMN_TYPE_MASK) == GRN_OBJ_COLUMN_VECTOR) {
        grn_obj *range = grn_ctx_at(ctx, DB_OBJ(obj)->range);
        if (range->header.flags & GRN_OBJ_KEY_VAR_SIZE) {
          GRN_VALUE_VAR_SIZE_INIT(&buf, GRN_OBJ_VECTOR, DB_OBJ(obj)->range);
        } else {
          GRN_VALUE_FIX_SIZE_INIT(&buf, GRN_OBJ_VECTOR, DB_OBJ(obj)->range);
        }
      } else {
        GRN_VALUE_VAR_SIZE_INIT(&buf, 0, DB_OBJ(obj)->range);
      }
      break;
    case GRN_COLUMN_INDEX :
      GRN_UINT32_INIT(&buf, 0);
      break;
    default:
      GRN_TEXT_INIT(&buf, 0);
      break;
    }
    grn_obj_get_value(ctx, obj, id, &buf);
  }
  grn_output_obj(ctx, outbuf, output_type, &buf, NULL);
exit :
  grn_obj_close(ctx, &buf);
}