Ejemplo n.º 1
0
static grn_obj *
command_echo(grn_ctx *ctx, GNUC_UNUSED int nargs, GNUC_UNUSED grn_obj **args,
             grn_user_data *user_data)
{
  grn_obj *var;

  char *input = NULL;
  unsigned int input_len = 0;

  var = grn_plugin_proc_get_var(ctx, user_data, "input", -1);

  GRN_PLUGIN_LOG(ctx, GRN_LOG_NOTICE, "[echo] input = %.*s",
                 (int)GRN_TEXT_LEN(var), GRN_TEXT_VALUE(var));

  if(GRN_TEXT_LEN(var) != 0) {
    input = GRN_TEXT_VALUE(var);
    input_len = GRN_TEXT_LEN(var);
  }

  grn_ctx_output_array_open(ctx, "RESULT", 2);
  grn_ctx_output_cstr(ctx, input);
  grn_ctx_output_int64(ctx, input_len);
  grn_ctx_output_array_close(ctx);

  return NULL;
}
Ejemplo n.º 2
0
static void
parse_synonyms_file_line(grn_ctx *ctx, const char *line, size_t line_length,
                         grn_obj *key, grn_obj *value)
{
  size_t i = 0;

  if (is_comment_mark(line[i])) {
    return;
  }

  while (i < line_length) {
    char character = line[i];
    i++;
    if (character == '\t') {
      break;
    }
    GRN_TEXT_PUTC(ctx, key, character);
  }

  if (i == line_length) {
    return;
  }

  GRN_TEXT_PUTS(ctx, value, "((");
  while (i < line_length) {
    char character = line[i];
    i++;
    if (character == '\t') {
      GRN_TEXT_PUTS(ctx, value, ") OR (");
    } else {
      GRN_TEXT_PUTC(ctx, value, character);
    }
  }
  GRN_TEXT_PUTS(ctx, value, "))");

  {
    grn_id id;
    void *value_location = NULL;

    id = grn_hash_add(ctx, synonyms, GRN_TEXT_VALUE(key), GRN_TEXT_LEN(key),
                      &value_location, NULL);
    if (id == GRN_ID_NIL) {
      GRN_PLUGIN_LOG(ctx, GRN_LOG_WARNING,
                     "[plugin][query-expander][tsv] "
                     "failed to register key: <%.*s>",
                     (int)GRN_TEXT_LEN(key), GRN_TEXT_VALUE(key));
      return;
    }

    if (GRN_TEXT_LEN(value) <= MAX_SYNONYM_BYTES - 1) {
      GRN_TEXT_PUTC(ctx, value, '\0');
    } else {
      grn_bulk_truncate(ctx, value, MAX_SYNONYM_BYTES - 1);
      GRN_TEXT_PUTC(ctx, value, '\0');
    }
    grn_memcpy(value_location, GRN_TEXT_VALUE(value), GRN_TEXT_LEN(value));
  }
}
Ejemplo n.º 3
0
static grn_obj *
command_tag_synonym(grn_ctx *ctx, GNUC_UNUSED int nargs, GNUC_UNUSED grn_obj **args,
                    GNUC_UNUSED grn_user_data *user_data)
{
  GNUC_UNUSED grn_obj *flags = grn_ctx_pop(ctx);
  grn_obj *newvalue = grn_ctx_pop(ctx);
  grn_obj *oldvalue = grn_ctx_pop(ctx);
  GNUC_UNUSED grn_obj *id = grn_ctx_pop(ctx);
  grn_obj buf;
  grn_obj record;
  grn_obj *domain;
  grn_obj *table;
  grn_obj *column;
  int i,n;

  if (GRN_BULK_VSIZE(newvalue) == 0 || GRN_INT32_VALUE(flags) == 0) {
    return NULL;
  }

  table = grn_ctx_at(ctx, oldvalue->header.domain);
  if (table && !is_table(table)) {
    GRN_PLUGIN_LOG(ctx, GRN_LOG_WARNING,
                   "[tag-synonym] "
                   "hooked column must be reference type");
    return NULL;
  }

  column = grn_obj_column(ctx,
                          table,
                          SYNONYM_COLUMN_NAME,
                          SYNONYM_COLUMN_NAME_LEN);
  if (!column) {
    GRN_PLUGIN_LOG(ctx, GRN_LOG_WARNING,
                   "[tag-synonym] "
                   "couldn't open synonym column");
    return NULL;
  }

  GRN_TEXT_INIT(&buf, 0);
  domain = grn_ctx_at(ctx, newvalue->header.domain);
  if (domain && is_string(domain)) {
    GRN_RECORD_INIT(&record, GRN_OBJ_VECTOR, oldvalue->header.domain);
    grn_table_tokenize(ctx, table, GRN_TEXT_VALUE(newvalue), GRN_TEXT_LEN(newvalue), &record, GRN_TRUE);
  } else if (newvalue->header.type == GRN_UVECTOR) {
    record = *newvalue;
  }

  if (is_string(domain) || newvalue->header.type == GRN_UVECTOR) {
    grn_obj value;

    GRN_RECORD_INIT(newvalue, GRN_OBJ_VECTOR, oldvalue->header.domain);
    GRN_UINT32_INIT(&value, 0);
    n = grn_vector_size(ctx, &record);
    for (i = 0; i < n; i++) {
      grn_id tid;
      tid = grn_uvector_get_element(ctx, &record, i, NULL);
      GRN_BULK_REWIND(&value);
      grn_obj_get_value(ctx, column, tid, &value);
      if (GRN_UINT32_VALUE(&value)) {
        GRN_PLUGIN_LOG(ctx, GRN_LOG_INFO,
                       "[tag-synonym] "
                       "changed: tid %d -> %d", tid, GRN_UINT32_VALUE(&value));
        tid = GRN_UINT32_VALUE(&value);
      }
      grn_uvector_add_element(ctx, newvalue, tid, 0);
    }
    grn_obj_unlink(ctx, &value);
  } else {
    grn_id tid;
    grn_obj value;
    tid = GRN_RECORD_VALUE(newvalue);
    GRN_UINT32_INIT(&value, 0);
    grn_obj_get_value(ctx, column, tid, &value);
    if (GRN_UINT32_VALUE(&value)) {
      GRN_PLUGIN_LOG(ctx, GRN_LOG_INFO,
                     "[tag-synonym] "
                     "changed: tid %d -> %d", tid, GRN_UINT32_VALUE(&value));
      tid = GRN_UINT32_VALUE(&value);
      GRN_BULK_REWIND(newvalue);
      GRN_RECORD_SET(ctx, newvalue, tid);
    }
    grn_obj_unlink(ctx, &value);
  }
  grn_obj_unlink(ctx, &buf);

  return NULL;
}
Ejemplo n.º 4
0
static grn_obj *
command_tag_synonym_add(grn_ctx *ctx, GNUC_UNUSED int nargs, GNUC_UNUSED grn_obj **args,
                        grn_user_data *user_data)
{
  grn_obj *var, *proc, *table, *column;
  unsigned int nhooks = 0;
  char *table_name = NULL;
  unsigned int table_len = 0;
  char *column_name = NULL;
  unsigned int column_len = 0;

  var = grn_plugin_proc_get_var(ctx, user_data, "table", -1);
  if (GRN_TEXT_LEN(var) != 0) {
    table_name = GRN_TEXT_VALUE(var);
    table_len = GRN_TEXT_LEN(var);
  }
  var = grn_plugin_proc_get_var(ctx, user_data, "column", -1);
  if (GRN_TEXT_LEN(var) != 0) {
    column_name = GRN_TEXT_VALUE(var);
    column_len = GRN_TEXT_LEN(var);
  }

  table = grn_ctx_get(ctx, table_name, table_len);
  column = grn_obj_column(ctx, table, column_name, column_len);

  {
    grn_obj *range;
    grn_obj *col;
    grn_id range_id;

    range_id = grn_obj_get_range(ctx, column);
    range = grn_ctx_at(ctx, range_id);

    if (!range) {
      GRN_PLUGIN_LOG(ctx, GRN_LOG_ERROR,
                     "[tag-synonym] "
                     "hooked column must be reference type");
      return NULL;
    }

    col = grn_obj_column(ctx,
                         range,
                         SYNONYM_COLUMN_NAME,
                         SYNONYM_COLUMN_NAME_LEN);
    if (!col) {
      GRN_PLUGIN_LOG(ctx, GRN_LOG_ERROR,
                     "[tag-synonym] "
                     "couldn't open synonym column");
      return NULL;
    }
  }

  proc = grn_ctx_get(ctx, "tag_synonym", -1);
  {
    grn_obj data;
    default_set_value_hook_data hook_data = { grn_obj_id(ctx, proc), 0 };
    GRN_TEXT_INIT(&data, GRN_OBJ_DO_SHALLOW_COPY);
    GRN_TEXT_SET_REF(&data, &hook_data, sizeof(hook_data));
    grn_obj_add_hook(ctx, column, GRN_HOOK_SET, 0, proc, &data);
    grn_obj_unlink(ctx, &data);
  }

  //grn_ctx_output_array_open(ctx, "RESULT", 1);
  nhooks = grn_obj_get_nhooks(ctx, column, GRN_HOOK_SET);
  grn_ctx_output_int32(ctx, nhooks);
  //grn_ctx_output_array_close(ctx);

  return NULL;
}
Ejemplo n.º 5
0
/*
  This function is called for a full text search query or a document to be
  indexed. This means that both short/long strings are given.
  The return value of this function is ignored. When an error occurs in this
  function, `ctx->rc' is overwritten with an error code (not GRN_SUCCESS).
 */
static grn_obj *
mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  char *buf, *p;
  const char *s;
  grn_mecab_tokenizer *tokenizer;
  unsigned int bufsize;
  grn_tokenizer_query *query;
  grn_obj *normalized_query;
  const char *normalized_string;
  unsigned int normalized_string_length;

  query = grn_tokenizer_query_open(ctx, nargs, args);
  if (!query) {
    return NULL;
  }
  if (!sole_mecab) {
    grn_plugin_mutex_lock(ctx, sole_mecab_mutex);
    if (!sole_mecab) {
      sole_mecab = mecab_new2("-Owakati");
      if (!sole_mecab) {
        GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
                         "[tokenizer][mecab] "
                         "mecab_new2() failed on mecab_init(): %s",
                         mecab_strerror(NULL));
      } else {
        sole_mecab_encoding = get_mecab_encoding(sole_mecab);
      }
    }
    grn_plugin_mutex_unlock(ctx, sole_mecab_mutex);
  }
  if (!sole_mecab) {
    grn_tokenizer_query_close(ctx, query);
    return NULL;
  }

  if (query->encoding != sole_mecab_encoding) {
    grn_tokenizer_query_close(ctx, query);
    GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
                     "[tokenizer][mecab] "
                     "MeCab dictionary charset (%s) does not match "
                     "the table encoding: <%s>",
                     grn_enctostr(sole_mecab_encoding),
                     grn_enctostr(query->encoding));
    return NULL;
  }

  if (!(tokenizer = GRN_PLUGIN_MALLOC(ctx, sizeof(grn_mecab_tokenizer)))) {
    grn_tokenizer_query_close(ctx, query);
    GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
                     "[tokenizer][mecab] "
                     "memory allocation to grn_mecab_tokenizer failed");
    return NULL;
  }
  tokenizer->mecab = sole_mecab;
  tokenizer->query = query;

  normalized_query = query->normalized_query;
  grn_string_get_normalized(ctx,
                            normalized_query,
                            &normalized_string,
                            &normalized_string_length,
                            NULL);
  tokenizer->have_tokenized_delimiter =
    grn_tokenizer_have_tokenized_delimiter(ctx,
                                           normalized_string,
                                           normalized_string_length,
                                           query->encoding);

  if (tokenizer->have_tokenized_delimiter) {
    tokenizer->buf = NULL;
    tokenizer->next = normalized_string;
    tokenizer->end = tokenizer->next + normalized_string_length;
  } else {
    grn_plugin_mutex_lock(ctx, sole_mecab_mutex);
    s = mecab_sparse_tostr2(tokenizer->mecab,
                            normalized_string,
                            normalized_string_length);
    if (!s) {
      GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
                       "[tokenizer][mecab] "
                       "mecab_sparse_tostr() failed len=%d err=%s",
                       normalized_string_length,
                       mecab_strerror(tokenizer->mecab));
    } else {
      bufsize = strlen(s) + 1;
      if (!(buf = GRN_PLUGIN_MALLOC(ctx, bufsize))) {
        GRN_PLUGIN_LOG(ctx, GRN_LOG_ALERT,
                       "[tokenizer][mecab] "
                       "buffer allocation on mecab_init failed !");
      } else {
        memcpy(buf, s, bufsize);
      }
    }
    grn_plugin_mutex_unlock(ctx, sole_mecab_mutex);
    if (!s || !buf) {
      grn_tokenizer_query_close(ctx, tokenizer->query);
      GRN_PLUGIN_FREE(ctx, tokenizer);
      return NULL;
    }
    /* A certain version of mecab returns trailing lf or spaces. */
    for (p = buf + bufsize - 2;
         buf <= p && isspace(*(unsigned char *)p);
         p--) { *p = '\0'; }
    tokenizer->buf = buf;
    tokenizer->next = buf;
    tokenizer->end = p + 1;
  }
  user_data->ptr = tokenizer;

  grn_tokenizer_token_init(ctx, &(tokenizer->token));

  return NULL;
}