Example #1
0
static grn_bool
chunked_tokenize_utf8_chunk(grn_ctx *ctx,
                            grn_mecab_tokenizer *tokenizer,
                            const char *chunk,
                            unsigned int chunk_bytes)
{
  const char *tokenized_chunk;
  size_t tokenized_chunk_length;

  tokenized_chunk = mecab_sparse_tostr2(tokenizer->mecab, chunk, chunk_bytes);
  if (!tokenized_chunk) {
    GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
                     "[tokenizer][mecab][chunk] "
                     "mecab_sparse_tostr2() failed len=%d err=%s",
                     chunk_bytes,
                     mecab_strerror(tokenizer->mecab));
    return GRN_FALSE;
  }

  if (GRN_TEXT_LEN(&(tokenizer->buf)) > 0) {
    GRN_TEXT_PUTS(ctx, &(tokenizer->buf), " ");
  }

  tokenized_chunk_length = strlen(tokenized_chunk);
  if (tokenized_chunk_length >= 1 &&
      isspace(tokenized_chunk[tokenized_chunk_length - 1])) {
    GRN_TEXT_PUT(ctx, &(tokenizer->buf),
                 tokenized_chunk, tokenized_chunk_length - 1);
  } else {
    GRN_TEXT_PUT(ctx, &(tokenizer->buf),
                 tokenized_chunk, tokenized_chunk_length);
  }

  return GRN_TRUE;
}
Example #2
0
static grn_rc
grn_pat_tag_keys(grn_ctx *ctx, grn_obj *keywords,
                 const char *string, unsigned int string_length,
                 const char **open_tags, unsigned int *open_tag_lengths,
                 const char **close_tags, unsigned int *close_tag_lengths,
                 unsigned int n_tags,
                 grn_obj *highlighted,
                 grn_bool use_html_escape)
{
  while (string_length > 0) {
#define MAX_N_HITS 16
    grn_pat_scan_hit hits[MAX_N_HITS];
    const char *rest;
    unsigned int i, n_hits;
    unsigned int previous = 0;
    size_t chunk_length;

    n_hits = grn_pat_scan(ctx, (grn_pat *)keywords,
                          string, string_length,
                          hits, MAX_N_HITS, &rest);
    for (i = 0; i < n_hits; i++) {
      unsigned int nth_tag;
      if (hits[i].offset - previous > 0) {
        grn_pat_tag_keys_put_original_text(ctx,
                                           highlighted,
                                           string + previous,
                                           hits[i].offset - previous,
                                           use_html_escape);
      }
      nth_tag = ((hits[i].id - 1) % n_tags);
      GRN_TEXT_PUT(ctx, highlighted,
                   open_tags[nth_tag], open_tag_lengths[nth_tag]);
      grn_pat_tag_keys_put_original_text(ctx,
                                         highlighted,
                                         string + hits[i].offset,
                                         hits[i].length,
                                         use_html_escape);
      GRN_TEXT_PUT(ctx, highlighted,
                   close_tags[nth_tag], close_tag_lengths[nth_tag]);
      previous = hits[i].offset + hits[i].length;
    }

    chunk_length = rest - string;
    if (chunk_length - previous > 0) {
      grn_pat_tag_keys_put_original_text(ctx,
                                         highlighted,
                                         string + previous,
                                         string_length - previous,
                                         use_html_escape);
    }
    string_length -= chunk_length;
    string = rest;
#undef MAX_N_HITS
  }

  return GRN_SUCCESS;
}
grn_rc
grn_string_inspect(grn_ctx *ctx, grn_obj *buffer, grn_obj *string)
{
  grn_string *string_ = (grn_string *)string;

  GRN_TEXT_PUTS(ctx, buffer, "#<string:");

  GRN_TEXT_PUTS(ctx, buffer, " original:<");
  GRN_TEXT_PUT(ctx, buffer,
               string_->original,
               string_->original_length_in_bytes);
  GRN_TEXT_PUTS(ctx, buffer, ">");
  GRN_TEXT_PUTS(ctx, buffer, "(");
  grn_text_itoa(ctx, buffer, string_->original_length_in_bytes);
  GRN_TEXT_PUTS(ctx, buffer, ")");

  GRN_TEXT_PUTS(ctx, buffer, " normalized:<");
  GRN_TEXT_PUT(ctx, buffer,
               string_->normalized,
               string_->normalized_length_in_bytes);
  GRN_TEXT_PUTS(ctx, buffer, ">");
  GRN_TEXT_PUTS(ctx, buffer, "(");
  grn_text_itoa(ctx, buffer, string_->normalized_length_in_bytes);
  GRN_TEXT_PUTS(ctx, buffer, ")");

  GRN_TEXT_PUTS(ctx, buffer, " n_characters:");
  grn_text_itoa(ctx, buffer, string_->n_characters);

  GRN_TEXT_PUTS(ctx, buffer, " encoding:");
  grn_inspect_encoding(ctx, buffer, string_->encoding);

  GRN_TEXT_PUTS(ctx, buffer, " flags:");
  if (string_->flags & GRN_STRING_REMOVE_BLANK) {
  GRN_TEXT_PUTS(ctx, buffer, "REMOVE_BLANK|");
  }
  if (string_->flags & GRN_STRING_WITH_TYPES) {
    GRN_TEXT_PUTS(ctx, buffer, "WITH_TYPES|");
  }
  if (string_->flags & GRN_STRING_WITH_CHECKS) {
    GRN_TEXT_PUTS(ctx, buffer, "WITH_CHECKS|");
  }
  if (string_->flags & GRN_STRING_REMOVE_TOKENIZED_DELIMITER) {
    GRN_TEXT_PUTS(ctx, buffer, "REMOVE_TOKENIZED_DELIMITER|");
  }
  if (GRN_TEXT_VALUE(buffer)[GRN_TEXT_LEN(buffer) - 1] == '|') {
    grn_bulk_truncate(ctx, buffer, GRN_TEXT_LEN(buffer) - 1);
  }

  GRN_TEXT_PUTS(ctx, buffer, ">");

  return GRN_SUCCESS;
}
Example #4
0
void
grn_output_array_close(grn_ctx *ctx, grn_obj *outbuf, grn_content_type output_type)
{
  switch (output_type) {
  case GRN_CONTENT_JSON:
    GRN_TEXT_PUTC(ctx, outbuf, ']');
    break;
  case GRN_CONTENT_TSV:
    if (DEPTH > 3) {
      if (CURR_LEVEL >= 2) { GRN_TEXT_PUTC(ctx, outbuf, '\t'); }
      GRN_TEXT_PUTC(ctx, outbuf, ']');
    }
    break;
  case GRN_CONTENT_XML:
    {
      const char *name;
      unsigned int name_len = grn_vector_pop_element(ctx, &ctx->impl->names, &name, NULL, NULL);
      GRN_TEXT_PUTS(ctx, outbuf, "</");
      GRN_TEXT_PUT(ctx, outbuf, name, name_len);
      GRN_TEXT_PUTC(ctx, outbuf, '>');
    }
    break;
  case GRN_CONTENT_MSGPACK :
    // do nothing
    break;
  case GRN_CONTENT_NONE:
    break;
  }
  DECR_DEPTH;
  INCR_LENGTH;
}
static ngx_int_t
ngx_http_groonga_handler_validate_post_command(ngx_http_request_t *r,
                                               ngx_str_t *command_path,
                                               ngx_http_groonga_handler_data_t *data)
{
  grn_ctx *context;
  ngx_str_t command;

  command.data = command_path->data;
  if (r->args.len == 0) {
    command.len = command_path->len;
  } else {
    command.len = command_path->len - r->args.len - strlen("?");
  }
  if (ngx_str_equal_c_string(&command, "load")) {
    return NGX_OK;
  }

  context = &(data->context);
  ngx_http_groonga_handler_set_content_type(r, "text/plain");
  GRN_TEXT_PUTS(context, &(data->body), "command for POST must be <load>: <");
  GRN_TEXT_PUT(context, &(data->body), command.data, command.len);
  GRN_TEXT_PUTS(context, &(data->body), ">");

  return NGX_HTTP_BAD_REQUEST;
}
Example #6
0
File: util.c Project: XLPE/groonga
grn_rc
grn_expr_inspect(grn_ctx *ctx, grn_obj *buffer, grn_obj *expr)
{
  grn_expr *e = (grn_expr *)expr;

  GRN_TEXT_PUTS(ctx, buffer, "#<expr\n");
  {
    int i = 0;
    grn_obj *value;
    const char *name;
    uint32_t name_len;
    unsigned int n_vars;
    grn_hash *vars = grn_expr_get_vars(ctx, expr, &n_vars);
    GRN_TEXT_PUTS(ctx, buffer, "  vars:{");
    GRN_HASH_EACH(ctx, vars, id, &name, &name_len, &value, {
      if (i++) {
        GRN_TEXT_PUTC(ctx, buffer, ',');
      }
      GRN_TEXT_PUTS(ctx, buffer, "\n    ");
      GRN_TEXT_PUT(ctx, buffer, name, name_len);
      GRN_TEXT_PUTC(ctx, buffer, ':');
      grn_inspect_indented(ctx, buffer, value, "    ");
    });
    GRN_TEXT_PUTS(ctx, buffer, "\n  },");
  }
Example #7
0
/*
 * Document-method: column_value
 *
 * call-seq:
 *   view.column_value(id, name) -> 値
 *
 * _view_の_id_に対応するカラム_name_の値を返す。
 */
static VALUE
rb_grn_view_get_column_value (VALUE self, VALUE rb_id, VALUE rb_name)
{
    VALUE rb_value = Qnil;
#ifdef WIN32
    rb_raise(rb_eNotImpError,
             "grn_obj_get_value_o() isn't available on Windows.");
#else
    RbGrnTable *rb_view;
    grn_ctx *context = NULL;
    grn_obj *view, *value, *accessor;
    grn_obj id;

    rb_view = SELF(self);
    rb_grn_table_deconstruct(rb_view, &view, &context,
                             NULL, NULL,
                             &value, NULL, NULL,
                             NULL);
    GRN_BULK_REWIND(value);
    GRN_TEXT_INIT(&id, 0);
    GRN_TEXT_PUT(context, &id, RSTRING_PTR(rb_id), RSTRING_LEN(rb_id));
    accessor = grn_obj_column(context, view,
                              RSTRING_PTR(rb_name), RSTRING_LEN(rb_name));
    grn_obj_get_value_o(context, accessor, &id, value);
    grn_obj_unlink(context, accessor);
    rb_value = GRNOBJ2RVAL(Qnil, context, value, self);
    GRN_OBJ_FIN(context, &id);
#endif

    return rb_value;
}
Example #8
0
static grn_obj *
snippet_exec(grn_ctx *ctx, grn_obj *snip, grn_obj *text,
             grn_user_data *user_data,
             const char *prefix, int prefix_length,
             const char *suffix, int suffix_length)
{
  grn_rc rc;
  unsigned int i, n_results, max_tagged_length;
  grn_obj snippet_buffer;
  grn_obj *snippets;

  if (GRN_TEXT_LEN(text) == 0) {
    return NULL;
  }

  rc = grn_snip_exec(ctx, snip,
                     GRN_TEXT_VALUE(text), GRN_TEXT_LEN(text),
                     &n_results, &max_tagged_length);
  if (rc != GRN_SUCCESS) {
    return NULL;
  }

  if (n_results == 0) {
    return grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
  }

  snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_SHORT_TEXT, GRN_OBJ_VECTOR);
  if (!snippets) {
    return NULL;
  }

  GRN_TEXT_INIT(&snippet_buffer, 0);
  grn_bulk_space(ctx, &snippet_buffer,
                 prefix_length + max_tagged_length + suffix_length);
  for (i = 0; i < n_results; i++) {
    unsigned int snippet_length;

    GRN_BULK_REWIND(&snippet_buffer);
    if (prefix_length) {
      GRN_TEXT_PUT(ctx, &snippet_buffer, prefix, prefix_length);
    }
    rc = grn_snip_get_result(ctx, snip, i,
                             GRN_TEXT_VALUE(&snippet_buffer) + prefix_length,
                             &snippet_length);
    if (rc == GRN_SUCCESS) {
      grn_strncat(GRN_TEXT_VALUE(&snippet_buffer),
                  GRN_BULK_WSIZE(&snippet_buffer),
                  suffix,
                  suffix_length);
      grn_vector_add_element(ctx, snippets,
                             GRN_TEXT_VALUE(&snippet_buffer),
                             prefix_length + snippet_length + suffix_length,
                             0, GRN_DB_SHORT_TEXT);
    }
  }
  GRN_OBJ_FIN(ctx, &snippet_buffer);

  return snippets;
}
Example #9
0
static grn_rc
grn_proc_inspect(grn_ctx *ctx, grn_obj *buf, grn_obj *obj)
{
  grn_proc *proc = (grn_proc *)obj;
  uint32_t i;

  GRN_TEXT_PUTS(ctx, buf, "#<proc:");
  switch (proc->type) {
  case GRN_PROC_INVALID :
    GRN_TEXT_PUTS(ctx, buf, "invalid");
    GRN_TEXT_PUTS(ctx, buf, ">");
    return GRN_SUCCESS;
    break;
  case GRN_PROC_TOKENIZER :
    GRN_TEXT_PUTS(ctx, buf, "tokenizer");
    break;
  case GRN_PROC_COMMAND :
    GRN_TEXT_PUTS(ctx, buf, "command");
    break;
  case GRN_PROC_FUNCTION :
    GRN_TEXT_PUTS(ctx, buf, "function");
    break;
  case GRN_PROC_HOOK :
    GRN_TEXT_PUTS(ctx, buf, "hook");
    break;
  case GRN_PROC_NORMALIZER :
    GRN_TEXT_PUTS(ctx, buf, "normalizer");
    break;
  case GRN_PROC_TOKEN_FILTER :
    GRN_TEXT_PUTS(ctx, buf, "token-filter");
    break;
  case GRN_PROC_SCORER :
    GRN_TEXT_PUTS(ctx, buf, "scorer");
    break;
  case GRN_PROC_WINDOW_FUNCTION :
    GRN_TEXT_PUTS(ctx, buf, "window-function");
    break;
  }
  GRN_TEXT_PUTS(ctx, buf, " ");

  grn_inspect_name(ctx, buf, obj);
  GRN_TEXT_PUTS(ctx, buf, " ");

  GRN_TEXT_PUTS(ctx, buf, "arguments:[");
  for (i = 0; i < proc->nvars; i++) {
    grn_expr_var *var = proc->vars + i;
    if (i != 0) {
      GRN_TEXT_PUTS(ctx, buf, ", ");
    }
    GRN_TEXT_PUT(ctx, buf, var->name, var->name_size);
  }
  GRN_TEXT_PUTS(ctx, buf, "]");

  GRN_TEXT_PUTS(ctx, buf, ">");

  return GRN_SUCCESS;
}
Example #10
0
static void
ngx_http_groonga_context_receive_handler_typed(grn_ctx *context,
                                               int flags,
                                               ngx_http_groonga_handler_data_t *data)
{
  char *result = NULL;
  unsigned int result_size = 0;
  int recv_flags;

  if (!(flags & GRN_CTX_TAIL)) {
    return;
  }

  grn_ctx_recv(context, &result, &result_size, &recv_flags);

#ifdef NGX_GRN_SUPPORT_STOP_BY_COMMAND
  if (recv_flags == GRN_CTX_QUIT) {
    ngx_int_t ngx_rc;
    ngx_int_t ngx_pid;

    if (ngx_process == NGX_PROCESS_SINGLE) {
      ngx_pid = getpid();
    } else {
      ngx_pid = getppid();
    }

    ngx_rc = ngx_os_signal_process((ngx_cycle_t *)ngx_cycle,
                                   "quit",
                                   ngx_pid);
    if (ngx_rc == NGX_OK) {
      context->stat &= ~GRN_CTX_QUIT;
      grn_ctx_recv(context, &result, &result_size, &recv_flags);
      context->stat |= GRN_CTX_QUIT;
    } else {
      context->rc = GRN_OPERATION_NOT_PERMITTED;
      GRN_TEXT_PUTS(context, &(data->typed.body), "false");
      context->stat &= ~GRN_CTX_QUIT;
    }
  }
#endif

  if (result_size > 0 ||
      GRN_TEXT_LEN(&(data->typed.body)) > 0 ||
      context->rc != GRN_SUCCESS) {
    if (result_size > 0) {
      GRN_TEXT_PUT(context, &(data->typed.body), result, result_size);
    }

    grn_output_envelope(context,
                        context->rc,
                        &(data->typed.head),
                        &(data->typed.body),
                        &(data->typed.foot),
                        NULL,
                        0);
  }
}
Example #11
0
static void
grn_pat_tag_keys_put_original_text(grn_ctx *ctx, grn_obj *output,
                                   const char *text, unsigned int length,
                                   grn_bool use_html_escape)
{
  if (use_html_escape) {
    grn_text_escape_xml(ctx, output, text, length);
  } else {
    GRN_TEXT_PUT(ctx, output, text, length);
  }
}
Example #12
0
  void test_mrn_hash_get()
  {
    const char *key = "mroonga";
    const char *value = "A storage engine based on groonga.";
    grn_obj *result;

    GRN_TEXT_SETS(ctx, &buffer, value);
    GRN_TEXT_PUT(ctx, &buffer, "\0", 1);

    mrn_hash_put(ctx, hash, key, &buffer);
    cut_assert_equal_int(0, mrn_hash_get(ctx, hash, key, &result));
    cut_assert_equal_string(value, GRN_TEXT_VALUE(&buffer));
  }
Example #13
0
  void ConditionConverter::append_field_value(const Item_field *field_item,
                                              grn_obj *expression) {
    MRN_DBUG_ENTER_METHOD();

    GRN_BULK_REWIND(&column_name_);
    GRN_TEXT_PUT(ctx_, &column_name_,
                 MRN_ITEM_FIELD_GET_NAME(field_item),
                 MRN_ITEM_FIELD_GET_NAME_LENGTH(field_item));
    grn_expr_append_const(ctx_, expression, &column_name_,
                          GRN_OP_PUSH, 1);
    grn_expr_append_op(ctx_, expression, GRN_OP_GET_VALUE, 1);

    DBUG_VOID_RETURN;
  }
Example #14
0
static grn_obj *
proc_define_selector(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
    uint32_t nvars;
    grn_expr_var *vars;
    grn_obj *outbuf = args[0];
    grn_proc_get_info(ctx, user_data, &vars, &nvars, NULL);
    if (grn_proc_create(ctx,
                        GRN_TEXT_VALUE(&vars[0].value),
                        GRN_TEXT_LEN(&vars[0].value),
                        NULL, proc_select, NULL, NULL, nvars - 1, vars + 1)) {
        GRN_TEXT_PUT(ctx, outbuf, GRN_TEXT_VALUE(&vars[0].value), GRN_TEXT_LEN(&vars[0].value));
    }
    return outbuf;
}
Example #15
0
static ngx_int_t
ngx_http_groonga_handler_process_command_path(ngx_http_request_t *r,
                                              ngx_str_t *command_path,
                                              ngx_http_groonga_handler_data_t *data)
{
  grn_obj uri;

  GRN_TEXT_INIT(&uri, 0);
  GRN_TEXT_PUTS(context, &uri, "/d/");
  GRN_TEXT_PUT(context, &uri, command_path->data, command_path->len);
  grn_ctx_send(context, GRN_TEXT_VALUE(&uri), GRN_TEXT_LEN(&uri),
               GRN_NO_FLAGS);
  ngx_http_groonga_context_log_error(r->connection->log);
  GRN_OBJ_FIN(context, &uri);

  return NGX_OK;
}
Example #16
0
static void
load_from_learner(msgpack_object *o, grn_ctx *ctx, grn_obj *cmd_buf)
{
  if (o->type == MSGPACK_OBJECT_MAP && o->via.map.size) {
    msgpack_object_kv *kv;
    kv = &(o->via.map.ptr[0]);
    if (kv->key.type == MSGPACK_OBJECT_RAW && kv->key.via.raw.size == 6 &&
        !memcmp(kv->key.via.raw.ptr, CONST_STR_LEN("target"))) {
      if (kv->val.type == MSGPACK_OBJECT_RAW) {
        int i;
        GRN_BULK_REWIND(cmd_buf);
        GRN_TEXT_PUTS(ctx, cmd_buf, "load --table ");
        GRN_TEXT_PUT(ctx, cmd_buf, kv->val.via.raw.ptr, kv->val.via.raw.size);
        grn_ctx_send(ctx, GRN_TEXT_VALUE(cmd_buf), GRN_TEXT_LEN(cmd_buf), GRN_CTX_MORE);
        grn_ctx_send(ctx, CONST_STR_LEN("["), GRN_CTX_MORE);
        if (kv->val.via.raw.size > 5) {
          if (!memcmp(kv->val.via.raw.ptr, CONST_STR_LEN("item_")) ||
              !memcmp(kv->val.via.raw.ptr, CONST_STR_LEN("pair_"))) {
            char delim = '{';
            GRN_BULK_REWIND(cmd_buf);
            for (i = 1; i < o->via.map.size; i++) {
              GRN_TEXT_PUTC(ctx, cmd_buf, delim);
              kv = &(o->via.map.ptr[i]);
              msgpack2json(&(kv->key), ctx, cmd_buf);
              GRN_TEXT_PUTC(ctx, cmd_buf, ':');
              msgpack2json(&(kv->val), ctx, cmd_buf);
              delim = ',';
            }
            GRN_TEXT_PUTC(ctx, cmd_buf, '}');
            /* printf("msg: %.*s\n", GRN_TEXT_LEN(cmd_buf), GRN_TEXT_VALUE(cmd_buf)); */
            grn_ctx_send(ctx, GRN_TEXT_VALUE(cmd_buf), GRN_TEXT_LEN(cmd_buf), GRN_CTX_MORE);
          }
        }
        grn_ctx_send(ctx, CONST_STR_LEN("]"), 0);
        {
          char *res;
          int flags;
          unsigned int res_len;
          grn_ctx_recv(ctx, &res, &res_len, &flags);
        }
      }
    }
  }
}
Example #17
0
static grn_encoding
detect_coding_part(grn_ctx *ctx, const char *line, size_t line_length)
{
  grn_encoding encoding = GRN_ENC_NONE;
  grn_obj null_terminated_line_buffer;
  const char *c_line;
  const char *coding_part_keyword = "coding: ";
  const char *coding_part;
  const char *encoding_name;

  GRN_TEXT_INIT(&null_terminated_line_buffer, 0);
  GRN_TEXT_PUT(ctx, &null_terminated_line_buffer, line, line_length);
  GRN_TEXT_PUTC(ctx, &null_terminated_line_buffer, '\0');

  c_line = GRN_TEXT_VALUE(&null_terminated_line_buffer);
  coding_part = strstr(c_line, coding_part_keyword);
  if (coding_part) {
    encoding_name = coding_part + strlen(coding_part_keyword);
    if (grn_strncasecmp(encoding_name, "utf-8", strlen("utf-8")) == 0 ||
        grn_strncasecmp(encoding_name, "utf8", strlen("utf8")) == 0) {
      encoding = GRN_ENC_UTF8;
    } else if (grn_strncasecmp(encoding_name, "sjis", strlen("sjis")) == 0 ||
               grn_strncasecmp(encoding_name, "Shift_JIS", strlen("Shift_JIS")) == 0) {
      encoding = GRN_ENC_SJIS;
    } else if (grn_strncasecmp(encoding_name, "EUC-JP", strlen("EUC-JP")) == 0 ||
               grn_strncasecmp(encoding_name, "euc_jp", strlen("euc_jp")) == 0) {
      encoding = GRN_ENC_EUC_JP;
    } else if (grn_strncasecmp(encoding_name, "latin1", strlen("latin1")) == 0) {
      encoding = GRN_ENC_LATIN1;
    } else if (grn_strncasecmp(encoding_name, "KOI8-R", strlen("KOI8-R")) == 0 ||
               grn_strncasecmp(encoding_name, "koi8r", strlen("koi8r")) == 0) {
      encoding = GRN_ENC_KOI8R;
    }
  } else {
    encoding = ctx->encoding;
  }
  GRN_OBJ_FIN(ctx, &null_terminated_line_buffer);

  return encoding;
}
static void
send_command(grn_ctx *ctx, grn_obj *buffer, const char *command,
             const char *dataset_name)
{
  const char *p = command;
  const char *dataset_place_holder = "${DATASET}";
  char *dataset_place_holder_position;

  if (ctx->rc != GRN_SUCCESS) {
    return;
  }

  GRN_BULK_REWIND(buffer);
  while ((dataset_place_holder_position = strstr(p, dataset_place_holder))) {
    GRN_TEXT_PUT(ctx, buffer, p, dataset_place_holder_position - p);
    GRN_TEXT_PUTS(ctx, buffer, dataset_name);
    p = dataset_place_holder_position + strlen(dataset_place_holder);
  }
  GRN_TEXT_PUTS(ctx, buffer, p);
  printf("> %.*s\n", (int)GRN_TEXT_LEN(buffer), GRN_TEXT_VALUE(buffer));
  grn_ctx_send(ctx, GRN_TEXT_VALUE(buffer), GRN_TEXT_LEN(buffer), 0);
  output(ctx);
}
Example #19
0
static void escape(EscapeInfo *info, UDF_ARGS *args)
{
  grn_ctx *ctx = &(info->ctx);
  char *query = args->args[0];
  unsigned int query_length = args->lengths[0];

  GRN_BULK_REWIND(&(info->escaped_query));
  if (args->arg_count == 2) {
    char *target_characters = args->args[1];
    unsigned int target_characters_length = args->lengths[1];
    GRN_TEXT_PUT(ctx, &(info->target_characters),
                 target_characters,
                 target_characters_length);
    GRN_TEXT_PUTC(ctx, &(info->target_characters), '\0');
    grn_expr_syntax_escape(ctx, query, query_length,
                           GRN_TEXT_VALUE(&(info->target_characters)),
                           GRN_QUERY_ESCAPE,
                           &(info->escaped_query));
  } else {
    grn_expr_syntax_escape_query(ctx, query, query_length,
                                 &(info->escaped_query));
  }
}
Example #20
0
void
grn_report_table(grn_ctx *ctx,
                 const char *action,
                 const char *tag,
                 grn_obj *table)
{
  grn_obj description;
  grn_obj *target;

  if (!grn_logger_pass(ctx, GRN_REPORT_INDEX_LOG_LEVEL)) {
    return;
  }

  GRN_TEXT_INIT(&description, 0);
  for (target = table; target; target = grn_ctx_at(ctx, target->header.domain)) {
    char name[GRN_TABLE_MAX_KEY_SIZE];
    int name_size;

    name_size = grn_obj_name(ctx, target, name, GRN_TABLE_MAX_KEY_SIZE);
    if (GRN_TEXT_LEN(&description) > 0) {
      GRN_TEXT_PUTS(ctx, &description, " -> ");
    }
    if (name_size == 0) {
      GRN_TEXT_PUTS(ctx, &description, "(temporary)");
    } else {
      GRN_TEXT_PUTS(ctx, &description, "<");
      GRN_TEXT_PUT(ctx, &description, name, name_size);
      GRN_TEXT_PUTS(ctx, &description, ">");
    }
  }
  GRN_LOG(ctx, GRN_REPORT_INDEX_LOG_LEVEL,
          "%s[table]%s %.*s",
          action, tag,
          (int)GRN_TEXT_LEN(&description),
          GRN_TEXT_VALUE(&description));
  GRN_OBJ_FIN(ctx, &description);
}
Example #21
0
static void
json_read(grn_ctx *ctx, grn_loader *loader, const char *str, unsigned int str_len)
{
  const char *const beg = str;
  char c;
  int len;
  const char *se = str + str_len;
  while (str < se) {
    c = *str;
    switch (loader->stat) {
    case GRN_LOADER_BEGIN :
      if ((len = grn_isspace(str, ctx->encoding))) {
        str += len;
        continue;
      }
      switch (c) {
      case '[' :
        JSON_READ_OPEN_BRACKET();
        break;
      case '{' :
        JSON_READ_OPEN_BRACE();
        break;
      default :
        ERR(GRN_INVALID_ARGUMENT,
            "JSON must start with '[' or '{': <%.*s>", str_len, beg);
        loader->stat = GRN_LOADER_END;
        break;
      }
      break;
    case GRN_LOADER_TOKEN :
      if ((len = grn_isspace(str, ctx->encoding))) {
        str += len;
        continue;
      }
      switch (c) {
      case '"' :
        loader->stat = GRN_LOADER_STRING;
        values_add(ctx, loader);
        str++;
        break;
      case '[' :
        JSON_READ_OPEN_BRACKET();
        break;
      case '{' :
        JSON_READ_OPEN_BRACE();
        break;
      case ':' :
        str++;
        break;
      case ',' :
        str++;
        break;
      case ']' :
        bracket_close(ctx, loader);
        loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
        if (ctx->rc == GRN_CANCEL) {
          loader->stat = GRN_LOADER_END;
        }
        str++;
        break;
      case '}' :
        brace_close(ctx, loader);
        loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
        if (ctx->rc == GRN_CANCEL) {
          loader->stat = GRN_LOADER_END;
        }
        str++;
        break;
      case '+' : case '-' : case '0' : case '1' : case '2' : case '3' :
      case '4' : case '5' : case '6' : case '7' : case '8' : case '9' :
        loader->stat = GRN_LOADER_NUMBER;
        values_add(ctx, loader);
        break;
      default :
        if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('_' == c)) {
          loader->stat = GRN_LOADER_SYMBOL;
          values_add(ctx, loader);
        } else {
          if ((len = grn_charlen(ctx, str, se))) {
            GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char('%c') at", c);
            GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg) + len, beg);
            GRN_LOG(ctx, GRN_LOG_ERROR, "%*s", (int)(str - beg) + 1, "^");
            str += len;
          } else {
            GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char(\\x%.2x) after", c);
            GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg), beg);
            str = se;
          }
        }
        break;
      }
      break;
    case GRN_LOADER_SYMBOL :
      if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') ||
          ('0' <= c && c <= '9') || ('_' == c)) {
        GRN_TEXT_PUTC(ctx, loader->last, c);
        str++;
      } else {
        char *v = GRN_TEXT_VALUE(loader->last);
        switch (*v) {
        case 'n' :
          if (GRN_TEXT_LEN(loader->last) == 4 && !memcmp(v, "null", 4)) {
            loader->last->header.domain = GRN_DB_VOID;
            GRN_BULK_REWIND(loader->last);
          }
          break;
        case 't' :
          if (GRN_TEXT_LEN(loader->last) == 4 && !memcmp(v, "true", 4)) {
            loader->last->header.domain = GRN_DB_BOOL;
            GRN_BOOL_SET(ctx, loader->last, GRN_TRUE);
          }
          break;
        case 'f' :
          if (GRN_TEXT_LEN(loader->last) == 5 && !memcmp(v, "false", 5)) {
            loader->last->header.domain = GRN_DB_BOOL;
            GRN_BOOL_SET(ctx, loader->last, GRN_FALSE);
          }
          break;
        default :
          break;
        }
        loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
      }
      break;
    case GRN_LOADER_NUMBER :
      switch (c) {
      case '+' : case '-' : case '.' : case 'e' : case 'E' :
      case '0' : case '1' : case '2' : case '3' : case '4' :
      case '5' : case '6' : case '7' : case '8' : case '9' :
        GRN_TEXT_PUTC(ctx, loader->last, c);
        str++;
        break;
      default :
        {
          const char *cur, *str = GRN_BULK_HEAD(loader->last);
          const char *str_end = GRN_BULK_CURR(loader->last);
          int64_t i = grn_atoll(str, str_end, &cur);
          if (cur == str_end) {
            loader->last->header.domain = GRN_DB_INT64;
            GRN_INT64_SET(ctx, loader->last, i);
          } else if (cur != str) {
            uint64_t i = grn_atoull(str, str_end, &cur);
            if (cur == str_end) {
              loader->last->header.domain = GRN_DB_UINT64;
              GRN_UINT64_SET(ctx, loader->last, i);
            } else if (cur != str) {
              double d;
              char *end;
              grn_obj buf;
              GRN_TEXT_INIT(&buf, 0);
              GRN_TEXT_PUT(ctx, &buf, str, GRN_BULK_VSIZE(loader->last));
              GRN_TEXT_PUTC(ctx, &buf, '\0');
              errno = 0;
              d = strtod(GRN_TEXT_VALUE(&buf), &end);
              if (!errno && end + 1 == GRN_BULK_CURR(&buf)) {
                loader->last->header.domain = GRN_DB_FLOAT;
                GRN_FLOAT_SET(ctx, loader->last, d);
              }
              GRN_OBJ_FIN(ctx, &buf);
            }
          }
        }
        loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
        break;
      }
      break;
    case GRN_LOADER_STRING :
      switch (c) {
      case '\\' :
        loader->stat = GRN_LOADER_STRING_ESC;
        str++;
        break;
      case '"' :
        str++;
        loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
        /*
        *(GRN_BULK_CURR(loader->last)) = '\0';
        GRN_LOG(ctx, GRN_LOG_ALERT, "read str(%s)", GRN_TEXT_VALUE(loader->last));
        */
        break;
      default :
        if ((len = grn_charlen(ctx, str, se))) {
          GRN_TEXT_PUT(ctx, loader->last, str, len);
          str += len;
        } else {
          GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char(\\x%.2x) after", c);
          GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg), beg);
          str = se;
        }
        break;
      }
      break;
    case GRN_LOADER_STRING_ESC :
      switch (c) {
      case 'b' :
        GRN_TEXT_PUTC(ctx, loader->last, '\b');
        loader->stat = GRN_LOADER_STRING;
        break;
      case 'f' :
        GRN_TEXT_PUTC(ctx, loader->last, '\f');
        loader->stat = GRN_LOADER_STRING;
        break;
      case 'n' :
        GRN_TEXT_PUTC(ctx, loader->last, '\n');
        loader->stat = GRN_LOADER_STRING;
        break;
      case 'r' :
        GRN_TEXT_PUTC(ctx, loader->last, '\r');
        loader->stat = GRN_LOADER_STRING;
        break;
      case 't' :
        GRN_TEXT_PUTC(ctx, loader->last, '\t');
        loader->stat = GRN_LOADER_STRING;
        break;
      case 'u' :
        loader->stat = GRN_LOADER_UNICODE0;
        break;
      default :
        GRN_TEXT_PUTC(ctx, loader->last, c);
        loader->stat = GRN_LOADER_STRING;
        break;
      }
      str++;
      break;
    case GRN_LOADER_UNICODE0 :
      switch (c) {
      case '0' : case '1' : case '2' : case '3' : case '4' :
      case '5' : case '6' : case '7' : case '8' : case '9' :
        loader->unichar = (c - '0') * 0x1000;
        break;
      case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
        loader->unichar = (c - 'a' + 10) * 0x1000;
        break;
      case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
        loader->unichar = (c - 'A' + 10) * 0x1000;
        break;
      default :
        ;// todo : error
      }
      loader->stat = GRN_LOADER_UNICODE1;
      str++;
      break;
    case GRN_LOADER_UNICODE1 :
      switch (c) {
      case '0' : case '1' : case '2' : case '3' : case '4' :
      case '5' : case '6' : case '7' : case '8' : case '9' :
        loader->unichar += (c - '0') * 0x100;
        break;
      case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
        loader->unichar += (c - 'a' + 10) * 0x100;
        break;
      case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
        loader->unichar += (c - 'A' + 10) * 0x100;
        break;
      default :
        ;// todo : error
      }
      loader->stat = GRN_LOADER_UNICODE2;
      str++;
      break;
    case GRN_LOADER_UNICODE2 :
      switch (c) {
      case '0' : case '1' : case '2' : case '3' : case '4' :
      case '5' : case '6' : case '7' : case '8' : case '9' :
        loader->unichar += (c - '0') * 0x10;
        break;
      case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
        loader->unichar += (c - 'a' + 10) * 0x10;
        break;
      case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
        loader->unichar += (c - 'A' + 10) * 0x10;
        break;
      default :
        ;// todo : error
      }
      loader->stat = GRN_LOADER_UNICODE3;
      str++;
      break;
    case GRN_LOADER_UNICODE3 :
      switch (c) {
      case '0' : case '1' : case '2' : case '3' : case '4' :
      case '5' : case '6' : case '7' : case '8' : case '9' :
        loader->unichar += (c - '0');
        break;
      case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
        loader->unichar += (c - 'a' + 10);
        break;
      case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
        loader->unichar += (c - 'A' + 10);
        break;
      default :
        ;// todo : error
      }
      {
        uint32_t u = loader->unichar;
        if (u >= 0xd800 && u <= 0xdbff) { /* High-surrogate code points */
          loader->unichar_hi = u;
          loader->stat = GRN_LOADER_STRING;
          str++;
          break;
        }
        if (u >= 0xdc00 && u <= 0xdfff) { /* Low-surrogate code points */
          u = 0x10000 + (loader->unichar_hi - 0xd800) * 0x400 + u - 0xdc00;
        }
        if (u < 0x80) {
          GRN_TEXT_PUTC(ctx, loader->last, u);
        } else {
          if (u < 0x800) {
            GRN_TEXT_PUTC(ctx, loader->last, (u >> 6) | 0xc0);
          } else {
            if (u < 0x10000) {
              GRN_TEXT_PUTC(ctx, loader->last, (u >> 12) | 0xe0);
            } else {
              GRN_TEXT_PUTC(ctx, loader->last, (u >> 18) | 0xf0);
              GRN_TEXT_PUTC(ctx, loader->last, ((u >> 12) & 0x3f) | 0x80);
            }
            GRN_TEXT_PUTC(ctx, loader->last, ((u >> 6) & 0x3f) | 0x80);
          }
          GRN_TEXT_PUTC(ctx, loader->last, (u & 0x3f) | 0x80);
        }
Example #22
0
void
grn_output_obj(grn_ctx *ctx, grn_obj *outbuf, grn_content_type output_type,
               grn_obj *obj, grn_obj_format *format)
{
  grn_obj buf;
  GRN_TEXT_INIT(&buf, 0);
  switch (obj->header.type) {
  case GRN_BULK :
    switch (obj->header.domain) {
    case GRN_DB_VOID :
      grn_output_void(ctx, outbuf, output_type, GRN_BULK_HEAD(obj), GRN_BULK_VSIZE(obj));
      break;
    case GRN_DB_SHORT_TEXT :
    case GRN_DB_TEXT :
    case GRN_DB_LONG_TEXT :
      grn_output_str(ctx, outbuf, output_type, GRN_BULK_HEAD(obj), GRN_BULK_VSIZE(obj));
      break;
    case GRN_DB_BOOL :
      grn_output_bool(ctx, outbuf, output_type,
                       GRN_BULK_VSIZE(obj) ? GRN_UINT8_VALUE(obj) : 0);
      break;
    case GRN_DB_INT8 :
      grn_output_int32(ctx, outbuf, output_type,
                       GRN_BULK_VSIZE(obj) ? GRN_INT8_VALUE(obj) : 0);
      break;
    case GRN_DB_UINT8 :
      grn_output_int32(ctx, outbuf, output_type,
                       GRN_BULK_VSIZE(obj) ? GRN_UINT8_VALUE(obj) : 0);
      break;
    case GRN_DB_INT16 :
      grn_output_int32(ctx, outbuf, output_type,
                       GRN_BULK_VSIZE(obj) ? GRN_INT16_VALUE(obj) : 0);
      break;
    case GRN_DB_UINT16 :
      grn_output_int32(ctx, outbuf, output_type,
                       GRN_BULK_VSIZE(obj) ? GRN_UINT16_VALUE(obj) : 0);
      break;
    case GRN_DB_INT32 :
      grn_output_int32(ctx, outbuf, output_type,
                       GRN_BULK_VSIZE(obj) ? GRN_INT32_VALUE(obj) : 0);
      break;
    case GRN_DB_UINT32 :
      grn_output_int64(ctx, outbuf, output_type,
                       GRN_BULK_VSIZE(obj) ? GRN_UINT32_VALUE(obj) : 0);
      break;
    case GRN_DB_INT64 :
      grn_output_int64(ctx, outbuf, output_type,
                       GRN_BULK_VSIZE(obj) ? GRN_INT64_VALUE(obj) : 0);
      break;
    case GRN_DB_UINT64 :
      grn_output_uint64(ctx, outbuf, output_type,
                        GRN_BULK_VSIZE(obj) ? GRN_UINT64_VALUE(obj) : 0);
      break;
    case GRN_DB_FLOAT :
      grn_output_float(ctx, outbuf, output_type,
                       GRN_BULK_VSIZE(obj) ? GRN_FLOAT_VALUE(obj) : 0);
      break;
    case GRN_DB_TIME :
      grn_output_time(ctx, outbuf, output_type,
                      GRN_BULK_VSIZE(obj) ? GRN_INT64_VALUE(obj) : 0);
      break;
    case GRN_DB_TOKYO_GEO_POINT :
    case GRN_DB_WGS84_GEO_POINT :
      grn_output_geo_point(ctx, outbuf, output_type,
                           GRN_BULK_VSIZE(obj) ? (grn_geo_point *)GRN_BULK_HEAD(obj) : NULL);
      break;
    default :
      if (format) {
        int j;
        int ncolumns = GRN_BULK_VSIZE(&format->columns)/sizeof(grn_obj *);
        grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns);
        if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
          grn_output_array_open(ctx, outbuf, output_type, "COLUMNS", ncolumns);
          for (j = 0; j < ncolumns; j++) {
            grn_id range_id;
            grn_output_array_open(ctx, outbuf, output_type, "COLUMN", 2);
            GRN_BULK_REWIND(&buf);
            grn_column_name_(ctx, columns[j], &buf);
            grn_output_obj(ctx, outbuf, output_type, &buf, NULL);
            /* column range */
            range_id = grn_obj_get_range(ctx, columns[j]);
            if (range_id == GRN_ID_NIL) {
              GRN_TEXT_PUTS(ctx, outbuf, "null");
            } else {
              int name_len;
              grn_obj *range_obj;
              char name_buf[GRN_TABLE_MAX_KEY_SIZE];

              range_obj = grn_ctx_at(ctx, range_id);
              name_len = grn_obj_name(ctx, range_obj, name_buf,
                                      GRN_TABLE_MAX_KEY_SIZE);
              GRN_BULK_REWIND(&buf);
              GRN_TEXT_PUT(ctx, &buf, name_buf, name_len);
              grn_output_obj(ctx, outbuf, output_type, &buf, NULL);
            }
            grn_output_array_close(ctx, outbuf, output_type);
          }
          grn_output_array_close(ctx, outbuf, output_type);
        }
        grn_output_array_open(ctx, outbuf, output_type, "HIT", ncolumns);
        for (j = 0; j < ncolumns; j++) {
          grn_text_atoj_o(ctx, outbuf, output_type, columns[j], obj);
        }
        grn_output_array_close(ctx, outbuf, output_type);
      } else {
        grn_obj *table = grn_ctx_at(ctx, obj->header.domain);
        grn_id id = *((grn_id *)GRN_BULK_HEAD(obj));
        if (table && table->header.type != GRN_TABLE_NO_KEY) {
          grn_obj *accessor = grn_obj_column(ctx, table, "_key", 4);
          if (accessor) {
            grn_obj_get_value(ctx, accessor, id, &buf);
            grn_obj_unlink(ctx, accessor);
          }
          grn_output_obj(ctx, outbuf, output_type, &buf, format);
        } else {
          grn_output_int64(ctx, outbuf, output_type, id);
        }
      }
      break;
    }
    break;
  case GRN_UVECTOR :
    if (format) {
      int i, j;
      grn_id *v = (grn_id *)GRN_BULK_HEAD(obj), *ve = (grn_id *)GRN_BULK_CURR(obj);
      int ncolumns = GRN_BULK_VSIZE(&format->columns) / sizeof(grn_obj *);
      grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns);
      grn_output_array_open(ctx, outbuf, output_type, "RESULTSET", -1);
      grn_output_array_open(ctx, outbuf, output_type, "NHITS", 1);
      grn_text_itoa(ctx, outbuf, ve - v);
      grn_output_array_close(ctx, outbuf, output_type);
      if (v < ve) {
        if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
          grn_output_array_open(ctx, outbuf, output_type, "COLUMNS", -1);
          for (j = 0; j < ncolumns; j++) {
            grn_id range_id;
            grn_output_array_open(ctx, outbuf, output_type, "COLUMN", -1);
            GRN_BULK_REWIND(&buf);
            grn_column_name_(ctx, columns[j], &buf);
            grn_output_obj(ctx, outbuf, output_type, &buf, NULL);
            /* column range */
            range_id = grn_obj_get_range(ctx, columns[j]);
            if (range_id == GRN_ID_NIL) {
              GRN_TEXT_PUTS(ctx, outbuf, "null");
            } else {
              int name_len;
              grn_obj *range_obj;
              char name_buf[GRN_TABLE_MAX_KEY_SIZE];

              range_obj = grn_ctx_at(ctx, range_id);
              name_len = grn_obj_name(ctx, range_obj, name_buf,
                                      GRN_TABLE_MAX_KEY_SIZE);
              GRN_BULK_REWIND(&buf);
              GRN_TEXT_PUT(ctx, &buf, name_buf, name_len);
              grn_output_obj(ctx, outbuf, output_type, &buf, NULL);
            }
            grn_output_array_close(ctx, outbuf, output_type);
          }
          grn_output_array_close(ctx, outbuf, output_type);
        }
        for (i = 0;; i++) {
          grn_output_array_open(ctx, outbuf, output_type, "HITS", -1);
          for (j = 0; j < ncolumns; j++) {
            GRN_BULK_REWIND(&buf);
            grn_obj_get_value(ctx, columns[j], *v, &buf);
            grn_output_obj(ctx, outbuf, output_type, &buf, NULL);
          }
          grn_output_array_close(ctx, outbuf, output_type);
          v++;
          if (v < ve) {

          } else {
            break;
          }
        }
      }
      grn_output_array_close(ctx, outbuf, output_type);
    } else {
      grn_obj *range = grn_ctx_at(ctx, obj->header.domain);
      if (range && range->header.type == GRN_TYPE) {
        int value_size = ((struct _grn_type *)range)->obj.range;
        char *v = (char *)GRN_BULK_HEAD(obj),
             *ve = (char *)GRN_BULK_CURR(obj);
        grn_output_array_open(ctx, outbuf, output_type, "VECTOR", -1);
        if (v < ve) {
          for (;;) {
            grn_obj value;
            GRN_OBJ_INIT(&value, GRN_BULK, 0, obj->header.domain);
            grn_bulk_write_from(ctx, &value, v, 0, value_size);
            grn_output_obj(ctx, outbuf, output_type, &value, NULL);

            v += value_size;
            if (v < ve) {

            } else {
              break;
            }
          }
        }
        grn_output_array_close(ctx, outbuf, output_type);
      } else {
        grn_id *v = (grn_id *)GRN_BULK_HEAD(obj),
               *ve = (grn_id *)GRN_BULK_CURR(obj);
        grn_output_array_open(ctx, outbuf, output_type, "VECTOR", ve - v);
        if (v < ve) {
          grn_obj key;
          GRN_OBJ_INIT(&key, GRN_BULK, 0, range->header.domain);
          for (;;) {
            if (range->header.type != GRN_TABLE_NO_KEY) {
              grn_table_get_key2(ctx, range, *v, &key);
              grn_output_obj(ctx, outbuf, output_type, &key, NULL);
              GRN_BULK_REWIND(&key);
            } else {
              grn_obj id;
              GRN_UINT32_INIT(&id, 0);
              GRN_UINT32_SET(ctx, &id, *v);
              grn_output_obj(ctx, outbuf, output_type, &id, NULL);
              GRN_OBJ_FIN(ctx, &id);
            }
            v++;
            if (v < ve) {

            } else {
              break;
            }
          }
          GRN_OBJ_FIN(ctx, &key);
        }
        grn_output_array_close(ctx, outbuf, output_type);
      }
    }
    break;
  case GRN_VECTOR :
    if (obj->header.domain == GRN_DB_VOID) {
      ERR(GRN_INVALID_ARGUMENT, "invalid obj->header.domain");
    }
    if (format) {
      ERR(GRN_FUNCTION_NOT_IMPLEMENTED,
          "cannot print GRN_VECTOR using grn_obj_format");
    } else {
      unsigned int i, n;
      grn_obj value;
      GRN_VOID_INIT(&value);
      n = grn_vector_size(ctx, obj);
      grn_output_array_open(ctx, outbuf, output_type, "VECTOR", -1);
      for (i = 0; i < n; i++) {
        const char *_value;
        unsigned int weight, length;
        grn_id domain;

        length = grn_vector_get_element(ctx, obj, i,
                                        &_value, &weight, &domain);
        if (domain != GRN_DB_VOID) {
          grn_obj_reinit(ctx, &value, domain, 0);
        } else {
          grn_obj_reinit(ctx, &value, obj->header.domain, 0);
        }
        grn_bulk_write(ctx, &value, _value, length);
        grn_output_obj(ctx, outbuf, output_type, &value, NULL);
      }
      grn_output_array_close(ctx, outbuf, output_type);
      GRN_OBJ_FIN(ctx, &value);
    }
    break;
  case GRN_PVECTOR :
    if (format) {
      ERR(GRN_FUNCTION_NOT_IMPLEMENTED,
          "cannot print GRN_PVECTOR using grn_obj_format");
    } else {
      unsigned int i, n;
      grn_output_array_open(ctx, outbuf, output_type, "VECTOR", -1);
      n = GRN_BULK_VSIZE(obj) / sizeof(grn_obj *);
      for (i = 0; i < n; i++) {
        grn_obj *value;

        value = GRN_PTR_VALUE_AT(obj, i);
        grn_output_obj(ctx, outbuf, output_type, value, NULL);
      }
      grn_output_array_close(ctx, outbuf, output_type);
    }
    break;
  case GRN_TABLE_HASH_KEY :
  case GRN_TABLE_PAT_KEY :
  case GRN_TABLE_NO_KEY :
  case GRN_TABLE_VIEW :
    if (format) {
      int i, j;
      int ncolumns = GRN_BULK_VSIZE(&format->columns)/sizeof(grn_obj *);
      grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns);
      grn_table_cursor *tc = grn_table_cursor_open(ctx, obj, NULL, 0, NULL, 0,
                                                   format->offset, format->limit,
                                                   GRN_CURSOR_ASCENDING);
      int resultset_size = -1;
      if (!tc) { ERRCLR(ctx); }
#ifdef HAVE_MESSAGE_PACK
      resultset_size = 1; /* [NHITS, (COLUMNS), (HITS)] */
      if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
        resultset_size++;
      }
      resultset_size += format->limit;
#endif
      grn_output_array_open(ctx, outbuf, output_type, "RESULTSET", resultset_size);
      grn_output_array_open(ctx, outbuf, output_type, "NHITS", 1);
      if (output_type == GRN_CONTENT_XML) {
        grn_text_itoa(ctx, outbuf, format->nhits);
      } else {
        grn_output_int32(ctx, outbuf, output_type, format->nhits);
      }
      grn_output_array_close(ctx, outbuf, output_type);
      if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
        grn_output_array_open(ctx, outbuf, output_type, "COLUMNS", ncolumns);
        for (j = 0; j < ncolumns; j++) {
          grn_id range_id;
          grn_output_array_open(ctx, outbuf, output_type, "COLUMN", 2);
          GRN_BULK_REWIND(&buf);
          grn_column_name_(ctx, columns[j], &buf);
          grn_output_obj(ctx, outbuf, output_type, &buf, NULL);
          /* column range */
          range_id = grn_obj_get_range(ctx, columns[j]);
          if (range_id == GRN_ID_NIL) {
            GRN_TEXT_PUTS(ctx, outbuf, "null");
          } else {
            int name_len;
            grn_obj *range_obj;
            char name_buf[GRN_TABLE_MAX_KEY_SIZE];

            range_obj = grn_ctx_at(ctx, range_id);
            name_len = grn_obj_name(ctx, range_obj, name_buf,
                                    GRN_TABLE_MAX_KEY_SIZE);
            GRN_BULK_REWIND(&buf);
            GRN_TEXT_PUT(ctx, &buf, name_buf, name_len);
            grn_output_obj(ctx, outbuf, output_type, &buf, NULL);
          }
          grn_output_array_close(ctx, outbuf, output_type);
        }
        grn_output_array_close(ctx, outbuf, output_type);
      }
      if (tc) {
        grn_obj id;
        GRN_TEXT_INIT(&id, 0);
        for (i = 0; !grn_table_cursor_next_o(ctx, tc, &id); i++) {
          grn_output_array_open(ctx, outbuf, output_type, "HIT", ncolumns);
          for (j = 0; j < ncolumns; j++) {
            grn_text_atoj_o(ctx, outbuf, output_type, columns[j], &id);
          }
          grn_output_array_close(ctx, outbuf, output_type);
        }
        GRN_OBJ_FIN(ctx, &id);
        grn_table_cursor_close(ctx, tc);
      }
      grn_output_array_close(ctx, outbuf, output_type);
    } else {
      int i;
      grn_obj *column = grn_obj_column(ctx, obj, "_key", 4);
      grn_table_cursor *tc = grn_table_cursor_open(ctx, obj, NULL, 0, NULL, 0,
                                                   0, -1, GRN_CURSOR_ASCENDING);
      grn_output_array_open(ctx, outbuf, output_type, "HIT", -1);
      if (tc) {
        grn_obj id;
        GRN_TEXT_INIT(&id, 0);
        for (i = 0; !grn_table_cursor_next_o(ctx, tc, &id); i++) {
          /* todo:
          grn_text_atoj_o(ctx, outbuf, output_type, column, &id);
          */
          GRN_BULK_REWIND(&buf);
          grn_obj_get_value_o(ctx, column, &id, &buf);
          grn_text_esc(ctx, outbuf, GRN_BULK_HEAD(&buf), GRN_BULK_VSIZE(&buf));
        }
        GRN_OBJ_FIN(ctx, &id);
        grn_table_cursor_close(ctx, tc);
      }
      grn_output_array_close(ctx, outbuf, output_type);
      grn_obj_unlink(ctx, column);
    }
    break;
  }
  GRN_OBJ_FIN(ctx, &buf);
}
Example #23
0
static grn_obj *
regexp_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  int char_len;
  grn_token_status status = 0;
  grn_regexp_tokenizer *tokenizer = user_data->ptr;
  unsigned int n_characters = 0;
  int ngram_unit = 2;
  grn_obj *buffer = &(tokenizer->buffer);
  const char *current = tokenizer->next;
  const char *end = tokenizer->end;
  const const uint_least8_t *char_types = tokenizer->char_types;
  grn_tokenize_mode mode = tokenizer->query->tokenize_mode;
  grn_bool is_begin = tokenizer->is_begin;
  grn_bool is_start_token = tokenizer->is_start_token;
  grn_bool break_by_blank = GRN_FALSE;
  grn_bool break_by_end_mark = GRN_FALSE;

  GRN_BULK_REWIND(buffer);
  tokenizer->is_begin = GRN_FALSE;
  tokenizer->is_start_token = GRN_FALSE;

  if (char_types) {
    char_types += tokenizer->nth_char;
  }

  if (mode != GRN_TOKEN_GET) {
    if (is_begin) {
      grn_tokenizer_token_push(ctx,
                               &(tokenizer->token),
                               GRN_TOKENIZER_BEGIN_MARK_UTF8,
                               GRN_TOKENIZER_BEGIN_MARK_UTF8_LEN,
                               status);
      return NULL;
    }

    if (tokenizer->is_end) {
      status |= GRN_TOKEN_LAST | GRN_TOKEN_REACH_END;
      grn_tokenizer_token_push(ctx,
                               &(tokenizer->token),
                               GRN_TOKENIZER_END_MARK_UTF8,
                               GRN_TOKENIZER_END_MARK_UTF8_LEN,
                               status);
      return NULL;
    }
    if (is_start_token) {
      if (char_types && GRN_STR_ISBLANK(char_types[-1])) {
        status |= GRN_TOKEN_SKIP;
        grn_tokenizer_token_push(ctx, &(tokenizer->token), "", 0, status);
        return NULL;
      }
    }
  }

  char_len = grn_charlen_(ctx, current, end, tokenizer->query->encoding);
  if (char_len == 0) {
    status |= GRN_TOKEN_LAST | GRN_TOKEN_REACH_END;
    grn_tokenizer_token_push(ctx, &(tokenizer->token), "", 0, status);
    return NULL;
  }

  if (mode == GRN_TOKEN_GET) {
    if (is_begin &&
        char_len == GRN_TOKENIZER_BEGIN_MARK_UTF8_LEN &&
        memcmp(current, GRN_TOKENIZER_BEGIN_MARK_UTF8, char_len) == 0) {
      n_characters++;
      GRN_TEXT_PUT(ctx, buffer, current, char_len);
      current += char_len;
      tokenizer->next = current;
      tokenizer->nth_char++;
      if (current == end) {
        status |= GRN_TOKEN_LAST | GRN_TOKEN_REACH_END;
      }
      grn_tokenizer_token_push(ctx,
                               &(tokenizer->token),
                               GRN_TOKENIZER_BEGIN_MARK_UTF8,
                               GRN_TOKENIZER_BEGIN_MARK_UTF8_LEN,
                               status);
      return NULL;
    }

    if (current + char_len == end &&
        char_len == GRN_TOKENIZER_END_MARK_UTF8_LEN &&
        memcmp(current, GRN_TOKENIZER_END_MARK_UTF8, char_len) == 0) {
      status |= GRN_TOKEN_LAST | GRN_TOKEN_REACH_END;
      grn_tokenizer_token_push(ctx,
                               &(tokenizer->token),
                               GRN_TOKENIZER_END_MARK_UTF8,
                               GRN_TOKENIZER_END_MARK_UTF8_LEN,
                               status);
      return NULL;
    }
  }

  while (GRN_TRUE) {
    n_characters++;
    GRN_TEXT_PUT(ctx, buffer, current, char_len);
    current += char_len;
    if (n_characters == 1) {
      tokenizer->next = current;
      tokenizer->nth_char++;
    }

    if (char_types) {
      uint_least8_t char_type;
      char_type = char_types[0];
      char_types++;
      if (GRN_STR_ISBLANK(char_type)) {
        break_by_blank = GRN_TRUE;
      }
    }

    char_len = grn_charlen_(ctx, (const char *)current, (const char *)end,
                            tokenizer->query->encoding);
    if (char_len == 0) {
      break;
    }

    if (mode == GRN_TOKEN_GET &&
        current + char_len == end &&
        char_len == GRN_TOKENIZER_END_MARK_UTF8_LEN &&
        memcmp(current, GRN_TOKENIZER_END_MARK_UTF8, char_len) == 0) {
      break_by_end_mark = GRN_TRUE;
    }

    if (break_by_blank || break_by_end_mark) {
      break;
    }

    if (n_characters == ngram_unit) {
      break;
    }
  }

  if (tokenizer->is_overlapping) {
    status |= GRN_TOKEN_OVERLAP;
  }
  if (n_characters < ngram_unit) {
    status |= GRN_TOKEN_UNMATURED;
  }
  tokenizer->is_overlapping = (n_characters > 1);

  if (mode == GRN_TOKEN_GET) {
    if (current == end) {
      tokenizer->is_end = GRN_TRUE;
      status |= GRN_TOKEN_LAST | GRN_TOKEN_REACH_END;
      if (status & GRN_TOKEN_UNMATURED) {
        status |= GRN_TOKEN_FORCE_PREFIX;
      }
    } else {
      if (break_by_blank) {
        tokenizer->get.n_skip_tokens = 0;
        tokenizer->is_start_token = GRN_TRUE;
      } else if (break_by_end_mark) {
        if (!is_start_token && (status & GRN_TOKEN_UNMATURED)) {
          status |= GRN_TOKEN_SKIP;
        }
      } else if (tokenizer->get.n_skip_tokens > 0) {
        tokenizer->get.n_skip_tokens--;
        status |= GRN_TOKEN_SKIP;
      } else {
        tokenizer->get.n_skip_tokens = ngram_unit - 1;
      }
    }
  } else {
    if (tokenizer->next == end) {
      tokenizer->is_end = GRN_TRUE;
    }
    if (break_by_blank) {
      tokenizer->is_start_token = GRN_TRUE;
    }
  }

  grn_tokenizer_token_push(ctx,
                           &(tokenizer->token),
                           GRN_TEXT_VALUE(buffer),
                           GRN_TEXT_LEN(buffer),
                           status);

  return NULL;
}
Example #24
0
MRN_API char *mroonga_snippet_html(UDF_INIT *init,
                                   UDF_ARGS *args,
                                   char *result,
                                   unsigned long *length,
                                   char *is_null,
                                   char *error)
{
  MRN_DBUG_ENTER_FUNCTION();

  mrn_snippet_html_info *info =
    reinterpret_cast<mrn_snippet_html_info *>(init->ptr);

  grn_ctx *ctx = info->ctx;
  grn_obj *snippet = info->snippet;
  grn_obj *result_buffer = &(info->result);

  if (!args->args[0]) {
    *is_null = 1;
    DBUG_RETURN(NULL);
  }

  if (!snippet) {
    if (mrn_snippet_html_prepare(info, args, NULL, &snippet)) {
      goto error;
    }
  }

  {
    char *target = args->args[0];
    unsigned int target_length = args->lengths[0];

    unsigned int n_results, max_tagged_length;
    {
      grn_rc rc = grn_snip_exec(ctx, snippet, target, target_length,
                                &n_results, &max_tagged_length);
      if (rc != GRN_SUCCESS) {
        my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM,
                        ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf);
        goto error;
      }
    }

    *is_null = 0;
    GRN_BULK_REWIND(result_buffer);

    {
      const char *start_tag = "<div class=\"snippet\">";
      const char *end_tag = "</div>";
      size_t start_tag_length = strlen(start_tag);
      size_t end_tag_length = strlen(end_tag);
      for (unsigned int i = 0; i < n_results; ++i) {
        GRN_TEXT_PUT(ctx, result_buffer, start_tag, start_tag_length);

        grn_bulk_reserve(ctx, result_buffer, max_tagged_length);
        unsigned int result_length;
        grn_rc rc =
          grn_snip_get_result(ctx, snippet, i,
                              GRN_BULK_CURR(result_buffer),
                              &result_length);
        if (rc) {
          my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM,
                          ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf);
          goto error;
        }
        grn_bulk_space(ctx, result_buffer, result_length);

        GRN_TEXT_PUT(ctx, result_buffer, end_tag, end_tag_length);
      }
    }

    if (!info->snippet) {
      grn_rc rc = grn_obj_close(ctx, snippet);
      if (rc != GRN_SUCCESS) {
        my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM,
                        ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf);
        goto error;
      }
    }
  }

  *length = GRN_TEXT_LEN(result_buffer);
  DBUG_RETURN(GRN_TEXT_VALUE(result_buffer));

error:
  if (!info->snippet && snippet) {
    grn_obj_close(ctx, snippet);
  }

  *is_null = 1;
  *error = 1;

  DBUG_RETURN(NULL);
}
Example #25
0
void
test_setoperation(gconstpointer data)
{
  grn_operator operator;
  grn_obj *entries;
  grn_obj *result1;
  grn_obj *result2;
  const char *dump;

  operator = gcut_data_get_int(data, "operator");

  assert_send_command("table_create Entries TABLE_HASH_KEY ShortText");
  send_command(
    "load "
    "--table Entries "
    "--values '[{\"_key\": \"a\"}, {\"_key\": \"b\"}, {\"_key\": \"c\"}]'");

  entries = grn_ctx_get(context, "Entries", -1);
  {
    const char *condition = "_id < 3";
    grn_obj *expr;
    grn_obj *variable;

    GRN_EXPR_CREATE_FOR_QUERY(context, entries, expr, variable);
    grn_expr_parse(context, expr,
                   condition, strlen(condition),
                   NULL, GRN_OP_AND, GRN_OP_MATCH, GRN_EXPR_SYNTAX_SCRIPT);
    result1 = grn_table_select(context, entries, expr, NULL, GRN_OP_OR);
    grn_obj_unlink(context, expr);
  }
  {
    const char *condition = "_id > 1";
    grn_obj *expr;
    grn_obj *variable;

    GRN_EXPR_CREATE_FOR_QUERY(context, entries, expr, variable);
    grn_expr_parse(context, expr,
                   condition, strlen(condition),
                   NULL, GRN_OP_AND, GRN_OP_MATCH, GRN_EXPR_SYNTAX_SCRIPT);
    result2 = grn_table_select(context, entries, expr, NULL, GRN_OP_OR);
    grn_obj_unlink(context, expr);
  }

  grn_table_setoperation(context, result1, result2, result1, operator);

  {
    grn_bool first_record = GRN_TRUE;
    grn_obj buffer;
    grn_obj *score_accessor;
    grn_obj score;

    GRN_TEXT_INIT(&buffer, 0);
    GRN_TEXT_PUTS(context, &buffer, "[");
    score_accessor = grn_obj_column(context, result1,
                                    GRN_COLUMN_NAME_SCORE,
                                    GRN_COLUMN_NAME_SCORE_LEN);
    GRN_FLOAT_INIT(&score, 0);
    GRN_TABLE_EACH_BEGIN(context, result1, cursor, id) {
      void *result_key;
      grn_id entry_id;
      char entry_key[GRN_TABLE_MAX_KEY_SIZE];
      int entry_key_size;

      if (first_record) {
        first_record = GRN_FALSE;
      } else {
        GRN_TEXT_PUTS(context, &buffer, ", ");
      }

      GRN_TEXT_PUTS(context, &buffer, "[");

      grn_table_cursor_get_key(context, cursor, &result_key);
      entry_id = *((grn_id *)result_key);
      entry_key_size = grn_table_get_key(context,
                                         entries,
                                         entry_id,
                                         entry_key,
                                         GRN_TABLE_MAX_KEY_SIZE);
      GRN_TEXT_PUT(context, &buffer, entry_key, entry_key_size);

      GRN_TEXT_PUTS(context, &buffer, ", ");

      GRN_BULK_REWIND(&score);
      grn_obj_get_value(context, score_accessor, id, &score);
      grn_text_printf(context, &buffer, "%.1f", GRN_FLOAT_VALUE(&score));

      GRN_TEXT_PUTS(context, &buffer, "]");
    } GRN_TABLE_EACH_END(context, cursor);
    GRN_OBJ_FIN(context, &score);
    grn_obj_unlink(context, score_accessor);
    GRN_TEXT_PUTS(context, &buffer, "]");

    dump = cut_take_strndup(GRN_TEXT_VALUE(&buffer), GRN_TEXT_LEN(&buffer));
    GRN_OBJ_FIN(context, &buffer);
  }
Example #26
0
/* grn_ts_writer_output_header() outputs names and data types. */
static grn_rc
grn_ts_writer_output_header(grn_ctx *ctx, grn_ts_writer *writer)
{
  grn_rc rc;
  GRN_OUTPUT_ARRAY_OPEN("COLUMNS", writer->n_exprs);
  for (size_t i = 0; i < writer->n_exprs; ++i) {
    GRN_OUTPUT_ARRAY_OPEN("COLUMN", 2);
    rc = grn_text_esc(ctx, ctx->impl->output.buf,
                      writer->names[i].ptr, writer->names[i].size);
    if (rc != GRN_SUCCESS) {
      return rc;
    }
    GRN_TEXT_PUT(ctx, ctx->impl->output.buf, ",\"", 2);
    switch (writer->exprs[i]->data_type) {
      case GRN_DB_VOID: {
        if (writer->exprs[i]->data_kind == GRN_TS_GEO) {
          GRN_TEXT_PUTS(ctx, ctx->impl->output.buf, "GeoPoint");
        } else {
          GRN_TEXT_PUTS(ctx, ctx->impl->output.buf, "Void");
        }
        break;
      }
      GRN_TS_WRITER_OUTPUT_HEADER_CASE(BOOL, "Bool")
      GRN_TS_WRITER_OUTPUT_HEADER_CASE(INT8, "Int8")
      GRN_TS_WRITER_OUTPUT_HEADER_CASE(INT16, "Int16")
      GRN_TS_WRITER_OUTPUT_HEADER_CASE(INT32, "Int32")
      GRN_TS_WRITER_OUTPUT_HEADER_CASE(INT64, "Int64")
      GRN_TS_WRITER_OUTPUT_HEADER_CASE(UINT8, "UInt8")
      GRN_TS_WRITER_OUTPUT_HEADER_CASE(UINT16, "UInt16")
      GRN_TS_WRITER_OUTPUT_HEADER_CASE(UINT32, "UInt32")
      GRN_TS_WRITER_OUTPUT_HEADER_CASE(UINT64, "UInt64")
      GRN_TS_WRITER_OUTPUT_HEADER_CASE(FLOAT, "Float")
      GRN_TS_WRITER_OUTPUT_HEADER_CASE(TIME, "Time")
      GRN_TS_WRITER_OUTPUT_HEADER_CASE(SHORT_TEXT, "ShortText")
      GRN_TS_WRITER_OUTPUT_HEADER_CASE(TEXT, "Text")
      GRN_TS_WRITER_OUTPUT_HEADER_CASE(LONG_TEXT, "LongText")
      GRN_TS_WRITER_OUTPUT_HEADER_CASE(TOKYO_GEO_POINT, "TokyoGeoPoint")
      GRN_TS_WRITER_OUTPUT_HEADER_CASE(WGS84_GEO_POINT, "WGS84GeoPoint")
      default: {
        char name_buf[GRN_TABLE_MAX_KEY_SIZE];
        size_t name_size;
        grn_obj *obj = grn_ctx_at(ctx, writer->exprs[i]->data_type);
        if (!obj) {
          GRN_TS_ERR_RETURN(GRN_UNKNOWN_ERROR, "grn_ctx_at failed: %d",
                            writer->exprs[i]->data_type);
        }
        if (!grn_ts_obj_is_table(ctx, obj)) {
          grn_obj_unlink(ctx, obj);
          GRN_TS_ERR_RETURN(GRN_UNKNOWN_ERROR, "not table: %d",
                            writer->exprs[i]->data_type);
        }
        name_size = grn_obj_name(ctx, obj, name_buf, sizeof(name_buf));
        GRN_TEXT_PUT(ctx, ctx->impl->output.buf, name_buf, name_size);
        grn_obj_unlink(ctx, obj);
        break;
      }
    }
    GRN_TEXT_PUTC(ctx, ctx->impl->output.buf, '"');
    GRN_OUTPUT_ARRAY_CLOSE();
  }
  GRN_OUTPUT_ARRAY_CLOSE(); /* COLUMNS. */
  return GRN_SUCCESS;
}
Example #27
0
static void
command_schema_column_command_collect_arguments(grn_ctx *ctx,
                                             grn_obj *table,
                                             grn_obj *column,
                                             grn_obj *arguments)
{
#define ADD(name_, value_)                              \
  grn_vector_add_element(ctx, arguments,                \
                         name_, strlen(name_),          \
                         0, GRN_DB_TEXT);               \
  grn_vector_add_element(ctx, arguments,                \
                         value_, strlen(value_),        \
                         0, GRN_DB_TEXT)

#define ADD_OBJECT_NAME(name_, object_) do {                    \
    char object_name[GRN_TABLE_MAX_KEY_SIZE];                   \
    unsigned int object_name_size;                              \
    object_name_size = grn_obj_name(ctx, object_,               \
                                    object_name,                \
                                    GRN_TABLE_MAX_KEY_SIZE);    \
    object_name[object_name_size] = '\0';                       \
    ADD(name_, object_name);                                    \
  } while (GRN_FALSE)

  ADD_OBJECT_NAME("table", table);
  {
    char column_name[GRN_TABLE_MAX_KEY_SIZE];
    unsigned int column_name_size;
    column_name_size = grn_column_name(ctx, column,
                                       column_name, GRN_TABLE_MAX_KEY_SIZE);
    column_name[column_name_size] = '\0';
    ADD("name", column_name);
  }

  {
    grn_obj flags;
    GRN_TEXT_INIT(&flags, 0);
    grn_dump_column_create_flags(ctx,
                                 column->header.flags & ~GRN_OBJ_PERSISTENT,
                                 &flags);
    GRN_TEXT_PUTC(ctx, &flags, '\0');
    ADD("flags", GRN_TEXT_VALUE(&flags));
    GRN_OBJ_FIN(ctx, &flags);
  }

  {
    grn_obj *value_type;

    value_type = grn_ctx_at(ctx, grn_obj_get_range(ctx, column));
    ADD_OBJECT_NAME("type", value_type);
  }

  if (column->header.type == GRN_COLUMN_INDEX) {
    grn_obj source_ids;
    unsigned int n_ids;

    GRN_RECORD_INIT(&source_ids, GRN_OBJ_VECTOR, GRN_ID_NIL);
    grn_obj_get_info(ctx, column, GRN_INFO_SOURCE, &source_ids);

    n_ids = GRN_BULK_VSIZE(&source_ids) / sizeof(grn_id);
    if (n_ids > 0) {
      grn_obj sources;
      unsigned int i;

      GRN_TEXT_INIT(&sources, 0);
      for (i = 0; i < n_ids; i++) {
        grn_id source_id;
        grn_obj *source;
        char name[GRN_TABLE_MAX_KEY_SIZE];
        unsigned int name_size;

        source_id = GRN_RECORD_VALUE_AT(&source_ids, i);
        source = grn_ctx_at(ctx, source_id);

        if (grn_obj_is_table(ctx, source)) {
          grn_strcpy(name, GRN_TABLE_MAX_KEY_SIZE, "_key");
          name_size = strlen(name);
        } else {
          name_size = grn_column_name(ctx, source, name, GRN_TABLE_MAX_KEY_SIZE);
        }
        if (i > 0) {
          GRN_TEXT_PUTC(ctx, &sources, ',');
        }
        GRN_TEXT_PUT(ctx, &sources, name, name_size);
      }
      GRN_TEXT_PUTC(ctx, &sources, '\0');
      ADD("sources", GRN_TEXT_VALUE(&sources));
      GRN_OBJ_FIN(ctx, &sources);
    }
    GRN_OBJ_FIN(ctx, &source_ids);
  }

#undef ADD_OBJECT_NAME
#undef ADD
}
Example #28
0
static grn_obj *
regexp_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  int char_len;
  grn_token_status status = 0;
  grn_regexp_tokenizer *tokenizer = user_data->ptr;
  unsigned int n_characters = 0;
  int ngram_unit = 2;
  grn_obj *buffer = &(tokenizer->buffer);
  const char *current = tokenizer->next;
  const char *end = tokenizer->end;
  grn_tokenize_mode mode = tokenizer->query->tokenize_mode;
  grn_bool escaping = GRN_FALSE;

  GRN_BULK_REWIND(buffer);

  if (mode == GRN_TOKEN_GET) {
    if (tokenizer->get.have_begin) {
      grn_tokenizer_token_push(ctx,
                               &(tokenizer->token),
                               GRN_TOKENIZER_BEGIN_MARK_UTF8,
                               GRN_TOKENIZER_BEGIN_MARK_UTF8_LEN,
                               status);
      tokenizer->get.have_begin = GRN_FALSE;
      return NULL;
    }

    if (tokenizer->is_end && tokenizer->get.have_end) {
      status |= GRN_TOKEN_LAST | GRN_TOKEN_REACH_END;
      grn_tokenizer_token_push(ctx,
                               &(tokenizer->token),
                               GRN_TOKENIZER_END_MARK_UTF8,
                               GRN_TOKENIZER_END_MARK_UTF8_LEN,
                               status);
      return NULL;
    }
  } else {
    if (tokenizer->is_begin) {
      grn_tokenizer_token_push(ctx,
                               &(tokenizer->token),
                               GRN_TOKENIZER_BEGIN_MARK_UTF8,
                               GRN_TOKENIZER_BEGIN_MARK_UTF8_LEN,
                               status);
      tokenizer->is_begin = GRN_FALSE;
      return NULL;
    }

    if (tokenizer->is_end) {
      status |= GRN_TOKEN_LAST | GRN_TOKEN_REACH_END;
      grn_tokenizer_token_push(ctx,
                               &(tokenizer->token),
                               GRN_TOKENIZER_END_MARK_UTF8,
                               GRN_TOKENIZER_END_MARK_UTF8_LEN,
                               status);
      return NULL;
    }
  }

  char_len = grn_charlen_(ctx, current, end, tokenizer->query->encoding);
  if (char_len == 0) {
    status |= GRN_TOKEN_LAST | GRN_TOKEN_REACH_END;
    grn_tokenizer_token_push(ctx, &(tokenizer->token), "", 0, status);
    return NULL;
  }

  while (GRN_TRUE) {
    if (!escaping && mode == GRN_TOKEN_GET &&
        char_len == 1 && current[0] == '\\') {
      current += char_len;
      escaping = GRN_TRUE;
    } else {
      n_characters++;
      GRN_TEXT_PUT(ctx, buffer, current, char_len);
      current += char_len;
      escaping = GRN_FALSE;
      if (n_characters == 1) {
        tokenizer->next = current;
      }
      if (n_characters == ngram_unit) {
        break;
      }
    }

    char_len = grn_charlen_(ctx, (const char *)current, (const char *)end,
                            tokenizer->query->encoding);
    if (char_len == 0) {
      break;
    }
  }

  if (tokenizer->is_overlapping) {
    status |= GRN_TOKEN_OVERLAP;
  }
  if (n_characters < ngram_unit) {
    status |= GRN_TOKEN_UNMATURED;
  }
  tokenizer->is_overlapping = (n_characters > 1);

  if (mode == GRN_TOKEN_GET) {
    if ((end - tokenizer->next) < ngram_unit) {
      if (tokenizer->get.have_end) {
        if (tokenizer->next == end) {
          tokenizer->is_end = GRN_TRUE;
        }
        if (status & GRN_TOKEN_UNMATURED) {
          if (tokenizer->is_first_token) {
            status |= GRN_TOKEN_FORCE_PREFIX;
          } else {
            status |= GRN_TOKEN_SKIP;
          }
        }
      } else {
        tokenizer->is_end = GRN_TRUE;
        status |= GRN_TOKEN_LAST | GRN_TOKEN_REACH_END;
        if (status & GRN_TOKEN_UNMATURED) {
          status |= GRN_TOKEN_FORCE_PREFIX;
        }
      }
    } else {
      if (tokenizer->get.n_skip_tokens > 0) {
        tokenizer->get.n_skip_tokens--;
        status |= GRN_TOKEN_SKIP;
      } else {
        tokenizer->get.n_skip_tokens = ngram_unit - 1;
      }
    }
  } else {
    if (tokenizer->next == end) {
      tokenizer->is_end = GRN_TRUE;
    }
  }

  grn_tokenizer_token_push(ctx,
                           &(tokenizer->token),
                           GRN_TEXT_VALUE(buffer),
                           GRN_TEXT_LEN(buffer),
                           status);
  tokenizer->is_first_token = GRN_FALSE;

  return NULL;
}
Example #29
0
static void
command_schema_table_command_collect_arguments(grn_ctx *ctx,
                                               grn_obj *table,
                                               grn_obj *arguments)
{
#define ADD(name_, value_)                              \
  grn_vector_add_element(ctx, arguments,                \
                         name_, strlen(name_),          \
                         0, GRN_DB_TEXT);               \
  grn_vector_add_element(ctx, arguments,                \
                         value_, strlen(value_),        \
                         0, GRN_DB_TEXT)

#define ADD_OBJECT_NAME(name_, object_) do {                    \
    char object_name[GRN_TABLE_MAX_KEY_SIZE];                   \
    unsigned int object_name_size;                              \
    object_name_size = grn_obj_name(ctx, object_,               \
                                    object_name,                \
                                    GRN_TABLE_MAX_KEY_SIZE);    \
    object_name[object_name_size] = '\0';                       \
    ADD(name_, object_name);                                    \
  } while (GRN_FALSE)

  ADD_OBJECT_NAME("name", table);

  {
    grn_obj flags;
    grn_table_flags table_flags;
    grn_table_flags ignored_flags = GRN_OBJ_KEY_NORMALIZE | GRN_OBJ_PERSISTENT;
    GRN_TEXT_INIT(&flags, 0);
    grn_table_get_info(ctx, table, &table_flags, NULL, NULL, NULL, NULL);
    grn_dump_table_create_flags(ctx,
                                table_flags & ~ignored_flags,
                                &flags);
    GRN_TEXT_PUTC(ctx, &flags, '\0');
    ADD("flags", GRN_TEXT_VALUE(&flags));
    GRN_OBJ_FIN(ctx, &flags);
  }

  {
    grn_obj *key_type = NULL;

    if (table->header.type != GRN_TABLE_NO_KEY &&
        table->header.domain != GRN_ID_NIL) {
      key_type = grn_ctx_at(ctx, table->header.domain);
    }
    if (key_type) {
      ADD_OBJECT_NAME("key_type", key_type);
    }
  }

  {
    grn_obj *value_type = NULL;
    grn_id range = GRN_ID_NIL;

    if (table->header.type != GRN_TABLE_DAT_KEY) {
      range = grn_obj_get_range(ctx, table);
    }
    if (range != GRN_ID_NIL) {
      value_type = grn_ctx_at(ctx, range);
    }
    if (value_type) {
      ADD_OBJECT_NAME("value_type", value_type);
    }
  }

  {
    grn_obj *tokenizer;
    tokenizer = grn_obj_get_info(ctx, table, GRN_INFO_DEFAULT_TOKENIZER, NULL);
    if (tokenizer) {
      ADD_OBJECT_NAME("default_tokenizer", tokenizer);
    }
  }

  {
    grn_obj *normalizer;
    normalizer = grn_obj_get_info(ctx, table, GRN_INFO_NORMALIZER, NULL);
    if (!normalizer && (table->header.flags & GRN_OBJ_KEY_NORMALIZE)) {
      normalizer = grn_ctx_get(ctx, "NormalizerAuto", -1);
    }
    if (normalizer) {
      ADD_OBJECT_NAME("normalizer", normalizer);
    }
  }

  if (table->header.type != GRN_TABLE_NO_KEY) {
    grn_obj token_filters;
    int n;

    GRN_PTR_INIT(&token_filters, GRN_OBJ_VECTOR, GRN_DB_OBJECT);
    grn_obj_get_info(ctx, table, GRN_INFO_TOKEN_FILTERS, &token_filters);
    n = GRN_BULK_VSIZE(&token_filters) / sizeof(grn_obj *);
    if (n > 0) {
      grn_obj token_filter_names;
      int i;

      GRN_TEXT_INIT(&token_filter_names, 0);
      for (i = 0; i < n; i++) {
        grn_obj *token_filter;
        char name[GRN_TABLE_MAX_KEY_SIZE];
        int name_size;

        token_filter = GRN_PTR_VALUE_AT(&token_filters, i);
        name_size = grn_obj_name(ctx, token_filter,
                                 name, GRN_TABLE_MAX_KEY_SIZE);
        if (i > 0) {
          GRN_TEXT_PUTC(ctx, &token_filter_names, ',');
        }
        GRN_TEXT_PUT(ctx, &token_filter_names, name, name_size);
      }
      GRN_TEXT_PUTC(ctx, &token_filter_names, '\0');
      ADD("token_filters", GRN_TEXT_VALUE(&token_filter_names));
      GRN_OBJ_FIN(ctx, &token_filter_names);
    }
    GRN_OBJ_FIN(ctx, &token_filters);
  }

#undef ADD_OBJECT_NAME
#undef ADD
}