예제 #1
0
파일: token.c 프로젝트: ryoqun/groonga
static grn_obj *
delimited_init(grn_ctx *ctx, grn_obj *table, grn_user_data *user_data,
               uint8_t *delimiter, uint32_t delimiter_len)
{
  grn_obj *str;
  int nflags = 0;
  grn_delimited_tokenizer *token;
  grn_obj_flags table_flags;
  if (!(str = grn_ctx_pop(ctx))) {
    ERR(GRN_INVALID_ARGUMENT, "missing argument");
    return NULL;
  }
  if (!(token = GRN_MALLOC(sizeof(grn_delimited_tokenizer)))) { return NULL; }
  user_data->ptr = token;
  token->delimiter = delimiter;
  token->delimiter_len = delimiter_len;
  token->pos = 0;
  grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL);
  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
                                    nflags, token->encoding))) {
    GRN_FREE(token);
    ERR(GRN_TOKENIZER_ERROR, "grn_str_open failed at grn_token_open");
    return NULL;
  }
  token->next = (unsigned char *)token->nstr->norm;
  token->end = token->next + token->nstr->norm_blen;
  token->len = token->nstr->length;
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return NULL;
}
예제 #2
0
파일: token.c 프로젝트: ikdttr/groonga
static grn_rc
ngram_init(grn_ctx *ctx, grn_obj *table, grn_proc_data *user_data, uint8_t ngram_unit)
{
  grn_obj *str;
  int nflags = GRN_STR_REMOVEBLANK|GRN_STR_WITH_CTYPES;
  grn_ngram_tokenizer *token;
  grn_obj_flags table_flags;
  if (!(str = grn_ctx_pop(ctx))) { return GRN_INVALID_ARGUMENT; }
  if (!(token = GRN_MALLOC(sizeof(grn_ngram_tokenizer)))) { return ctx->rc; }
  user_data->ptr = token;
  token->uni_alpha = 1;
  token->uni_digit = 1;
  token->uni_symbol = 1;
  token->ngram_unit = ngram_unit;
  token->overlap = 0;
  token->pos = 0;
  token->skip = 0;
  grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL);
  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
                                    nflags, token->encoding))) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "grn_str_open failed at grn_token_open");
    return GRN_TOKENIZER_ERROR;
  }
  token->next = (unsigned char *)token->nstr->norm;
  token->end = token->next + token->nstr->norm_blen;
  token->ctypes = token->nstr->ctypes;
  token->len = token->nstr->length;
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return GRN_SUCCESS;
}
예제 #3
0
파일: tokenizer.c 프로젝트: groonga/groonga
grn_rc
grn_tokenizer_query_set_lexicon(grn_ctx *ctx,
                                grn_tokenizer_query *query,
                                grn_obj *lexicon)
{
  GRN_API_ENTER;

  if (query->lexicon != lexicon) {
    query->lexicon = lexicon;
    if (query->lexicon) {
      grn_table_get_info(ctx,
                         query->lexicon,
                         NULL,
                         &(query->encoding),
                         NULL,
                         NULL,
                         NULL);
    } else {
      query->encoding = ctx->encoding;
    }
    query->need_normalize = GRN_TRUE;
  }

  GRN_API_RETURN(ctx->rc);
}
예제 #4
0
파일: token.c 프로젝트: mooz/groonga
grn_token *
grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len,
               grn_token_mode mode)
{
  grn_token *token;
  grn_encoding encoding;
  grn_obj *tokenizer;
  if (grn_table_get_info(ctx, table, NULL, &encoding, &tokenizer)) { return NULL; }
  if (!(token = GRN_MALLOC(sizeof(grn_token)))) { return NULL; }
  token->table = table;
  token->mode = mode;
  token->encoding = encoding;
  token->tokenizer = tokenizer;
  token->orig = str;
  token->orig_blen = str_len;
  token->curr = NULL;
  token->curr_size = 0;
  token->pos = -1;
  token->status = grn_token_doing;
  token->force_prefix = 0;
  if (tokenizer) {
    grn_obj str_;
    GRN_TEXT_INIT(&str_, GRN_OBJ_DO_SHALLOW_COPY);
    GRN_TEXT_SET_REF(&str_, str, str_len);
    token->pctx.caller = NULL;
    token->pctx.user_data.ptr = NULL;
    token->pctx.proc = (grn_proc *)tokenizer;
    token->pctx.hooks = NULL;
    token->pctx.currh = NULL;
    token->pctx.phase = PROC_INIT;
    grn_ctx_push(ctx, &str_);
    ((grn_proc *)tokenizer)->funcs[PROC_INIT](ctx, 1, &table, &token->pctx.user_data);
    grn_obj_close(ctx, &str_);
  }
  if (ctx->rc) {
    GRN_FREE(token);
    token = NULL;
  }
  return token;
}
예제 #5
0
파일: token.c 프로젝트: ryoqun/groonga
static grn_obj *
ngram_init(grn_ctx *ctx, grn_obj *table, grn_user_data *user_data, uint8_t ngram_unit,
           uint8_t uni_alpha, uint8_t uni_digit, uint8_t uni_symbol, uint8_t ignore_blank)
{
  grn_obj *str;
  int nflags = GRN_STR_REMOVEBLANK|GRN_STR_WITH_CTYPES;
  grn_ngram_tokenizer *token;
  grn_obj_flags table_flags;
  if (!(str = grn_ctx_pop(ctx))) {
    ERR(GRN_INVALID_ARGUMENT, "missing argument");
    return NULL;
  }
  if (!(token = GRN_MALLOC(sizeof(grn_ngram_tokenizer)))) { return NULL; }
  user_data->ptr = token;
  token->uni_alpha = uni_alpha;
  token->uni_digit = uni_digit;
  token->uni_symbol = uni_symbol;
  token->ngram_unit = ngram_unit;
  token->ignore_blank = ignore_blank;
  token->overlap = 0;
  token->pos = 0;
  token->skip = 0;
  grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL);
  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
                                    nflags, token->encoding))) {
    GRN_FREE(token);
    ERR(GRN_TOKENIZER_ERROR, "grn_str_open failed at grn_token_open");
    return NULL;
  }
  token->next = (unsigned char *)token->nstr->norm;
  token->end = token->next + token->nstr->norm_blen;
  token->ctypes = token->nstr->ctypes;
  token->len = token->nstr->length;
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return NULL;
}
예제 #6
0
파일: token.c 프로젝트: ikdttr/groonga
static grn_rc
mecab_init(grn_ctx *ctx, grn_obj *table, grn_proc_data *user_data)
{
  grn_obj *str;
  int nflags = 0;
  char *buf, *s, *p;
  char mecab_err[256];
  grn_obj_flags table_flags;
  grn_mecab_tokenizer *token;
  unsigned int bufsize, maxtrial = 10, len;
  if (!(str = grn_ctx_pop(ctx))) { return GRN_INVALID_ARGUMENT; }
  SOLE_MECAB_CONFIRM;
  if (!sole_mecab) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "mecab_new failed on grn_mecab_init");
    return GRN_TOKENIZER_ERROR;
  }
  if (!(token = GRN_MALLOC(sizeof(grn_mecab_tokenizer)))) { return ctx->rc; }
  user_data->ptr = token;
  token->mecab = sole_mecab;
  // if (!(token->mecab = mecab_new3())) {
  grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL);
  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
                                    nflags, token->encoding))) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "grn_str_open failed at grn_token_open");
    return GRN_TOKENIZER_ERROR;
  }
  len = token->nstr->norm_blen;
  mecab_err[sizeof(mecab_err) - 1] = '\0';
  for (bufsize = len * 2 + 1; maxtrial; bufsize *= 2, maxtrial--) {
    if(!(buf = GRN_MALLOC(bufsize + 1))) {
      GRN_LOG(ctx, GRN_LOG_ALERT, "buffer allocation on mecab_init failed !");
      GRN_FREE(token);
      return ctx->rc;
    }
    MUTEX_LOCK(sole_mecab_lock);
    s = mecab_sparse_tostr3(token->mecab, token->nstr->norm, len, buf, bufsize);
    if (!s) {
      strncpy(mecab_err, mecab_strerror(token->mecab), sizeof(mecab_err) - 1);
    }
    MUTEX_UNLOCK(sole_mecab_lock);
    if (s) { break; }
    GRN_FREE(buf);
    if (strstr(mecab_err, "output buffer overflow") == NULL) { break; }
  }
  if (!s) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "mecab_sparse_tostr failed len=%d bufsize=%d err=%s",
            len, bufsize, mecab_err);
    GRN_FREE(token);
    return GRN_TOKENIZER_ERROR;
  }
  // certain version of mecab returns trailing lf or spaces.
  for (p = buf + strlen(buf) - 1;
       buf <= p && (*p == '\n' || isspace(*(unsigned char *)p));
       p--) { *p = '\0'; }
  //grn_log("sparsed='%s'", s);
  token->buf = (unsigned char *)buf;
  token->next = (unsigned char *)buf;
  token->end = (unsigned char *)buf + strlen(buf);
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return GRN_SUCCESS;
}
예제 #7
0
grn_tokenizer_query *
grn_tokenizer_query_open(grn_ctx *ctx, int num_args, grn_obj **args,
                         unsigned int normalize_flags)
{
  grn_obj *flags = grn_ctx_pop(ctx);
  grn_obj *query_str = grn_ctx_pop(ctx);
  grn_obj *tokenize_mode = grn_ctx_pop(ctx);

  if (query_str == NULL) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "missing argument");
    return NULL;
  }

  if ((num_args < 1) || (args == NULL) || (args[0] == NULL)) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "invalid NULL pointer");
    return NULL;
  }

  {
    grn_tokenizer_query * const query =
        GRN_PLUGIN_MALLOC(ctx, sizeof(grn_tokenizer_query));
    if (query == NULL) {
      return NULL;
    }
    query->normalized_query = NULL;
    query->query_buf = NULL;
    if (flags) {
      query->flags = GRN_UINT32_VALUE(flags);
    } else {
      query->flags = 0;
    }
    if (tokenize_mode) {
      query->tokenize_mode = GRN_UINT32_VALUE(tokenize_mode);
    } else {
      query->tokenize_mode = GRN_TOKENIZE_ADD;
    }
    query->token_mode = query->tokenize_mode;

    {
      grn_obj * const table = args[0];
      grn_obj_flags table_flags;
      grn_encoding table_encoding;
      unsigned int query_length = GRN_TEXT_LEN(query_str);
      char *query_buf = (char *)GRN_PLUGIN_MALLOC(ctx, query_length + 1);
      grn_obj *normalizer = NULL;

      if (query_buf == NULL) {
        GRN_PLUGIN_FREE(ctx, query);
        GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
                         "[tokenizer] failed to duplicate query");
        return NULL;
      }
      grn_table_get_info(ctx, table, &table_flags, &table_encoding, NULL,
                         &normalizer, NULL);
      {
        grn_obj *normalized_query;
        if (table_flags & GRN_OBJ_KEY_NORMALIZE) {
          normalizer = GRN_NORMALIZER_AUTO;
        }
        normalized_query = grn_string_open_(ctx,
                                            GRN_TEXT_VALUE(query_str),
                                            GRN_TEXT_LEN(query_str),
                                            normalizer,
                                            normalize_flags,
                                            table_encoding);
        if (!normalized_query) {
          GRN_PLUGIN_FREE(ctx, query_buf);
          GRN_PLUGIN_FREE(ctx, query);
          GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
                           "[tokenizer] failed to open normalized string");
          return NULL;
        }
        query->normalized_query = normalized_query;
        grn_memcpy(query_buf, GRN_TEXT_VALUE(query_str), query_length);
        query_buf[query_length] = '\0';
        query->query_buf = query_buf;
        query->ptr = query_buf;
        query->length = query_length;
      }
      query->encoding = table_encoding;

      if (query->flags & GRN_TOKEN_CURSOR_ENABLE_TOKENIZED_DELIMITER) {
        const char *normalized_string;
        unsigned int normalized_string_length;

        grn_string_get_normalized(ctx,
                                  query->normalized_query,
                                  &normalized_string,
                                  &normalized_string_length,
                                  NULL);
        query->have_tokenized_delimiter =
          grn_tokenizer_have_tokenized_delimiter(ctx,
                                                 normalized_string,
                                                 normalized_string_length,
                                                 query->encoding);
      } else {
        query->have_tokenized_delimiter = GRN_FALSE;
      }
    }
    return query;
  }
}
예제 #8
0
파일: operator.c 프로젝트: groonga/groonga
static grn_bool
exec_regexp_uvector_bulk(grn_ctx *ctx, grn_obj *uvector, grn_obj *pattern)
{
#ifdef GRN_SUPPORT_REGEXP
  grn_bool matched = GRN_FALSE;
  unsigned int i, size;
  OnigRegex regex;
  grn_obj *domain;
  grn_obj *normalizer;
  grn_obj *normalizer_auto = NULL;

  size = grn_uvector_size(ctx, uvector);
  if (size == 0) {
    return GRN_FALSE;
  }

  regex = grn_onigmo_new(ctx,
                         GRN_TEXT_VALUE(pattern),
                         GRN_TEXT_LEN(pattern),
                         GRN_ONIGMO_OPTION_DEFAULT,
                         GRN_ONIGMO_SYNTAX_DEFAULT,
                         "[operator]");
  if (!regex) {
    return GRN_FALSE;
  }

  domain = grn_ctx_at(ctx, uvector->header.domain);
  if (!domain) {
    onig_free(regex);
    return GRN_FALSE;
  }

  grn_table_get_info(ctx, domain, NULL, NULL, NULL, &normalizer, NULL);
  if (!normalizer) {
    normalizer_auto = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);
  }

  for (i = 0; i < size; i++) {
    grn_id record_id;
    char key[GRN_TABLE_MAX_KEY_SIZE];
    int key_size;

    record_id = grn_uvector_get_element(ctx, uvector, i, NULL);
    key_size = grn_table_get_key(ctx, domain, record_id,
                                 key, GRN_TABLE_MAX_KEY_SIZE);
    if (key_size == 0) {
      continue;
    }

    if (normalizer) {
      matched = regexp_is_match(ctx, regex, key, key_size);
    } else {
      grn_obj *norm_key;
      const char *norm_key_raw;
      unsigned int norm_key_raw_length_in_bytes;

      norm_key = grn_string_open(ctx, key, key_size, normalizer_auto, 0);
      grn_string_get_normalized(ctx, norm_key,
                                &norm_key_raw,
                                &norm_key_raw_length_in_bytes,
                                NULL);
      matched = regexp_is_match(ctx, regex,
                                norm_key_raw,
                                norm_key_raw_length_in_bytes);
      grn_obj_unlink(ctx, norm_key);
    }

    if (matched) {
      break;
    }
  }

  if (normalizer_auto) {
    grn_obj_unlink(ctx, normalizer_auto);
  }

  grn_obj_unlink(ctx, domain);

  onig_free(regex);

  return matched;
#else /* GRN_SUPPORT_REGEXP */
  return GRN_FALSE;
#endif /* GRN_SUPPORT_REGEXP */
}
예제 #9
0
파일: operator.c 프로젝트: groonga/groonga
static grn_bool
exec_text_operator_record_text(grn_ctx *ctx,
                               grn_operator op,
                               grn_obj *record, grn_obj *table,
                               grn_obj *query)
{
  grn_obj *normalizer;
  char record_key[GRN_TABLE_MAX_KEY_SIZE];
  int record_key_len;
  grn_bool matched = GRN_FALSE;

  if (table->header.domain != GRN_DB_SHORT_TEXT) {
    return GRN_FALSE;
  }

  if (GRN_TEXT_LEN(query) == 0) {
    return GRN_FALSE;
  }

  record_key_len = grn_table_get_key(ctx, table, GRN_RECORD_VALUE(record),
                                     record_key, GRN_TABLE_MAX_KEY_SIZE);
  grn_table_get_info(ctx, table, NULL, NULL, NULL, &normalizer, NULL);
  if (normalizer) {
    grn_obj *norm_query;
    const char *norm_query_raw;
    unsigned int norm_query_raw_length_in_bytes;

    if (op == GRN_OP_REGEXP) {
      norm_query = NULL;
      norm_query_raw = GRN_TEXT_VALUE(query);
      norm_query_raw_length_in_bytes = GRN_TEXT_LEN(query);
    } else {
      norm_query = grn_string_open(ctx,
                                   GRN_TEXT_VALUE(query),
                                   GRN_TEXT_LEN(query),
                                   table,
                                   0);
      grn_string_get_normalized(ctx, norm_query,
                                &norm_query_raw,
                                &norm_query_raw_length_in_bytes,
                                NULL);
    }
    matched = exec_text_operator(ctx,
                                 op,
                                 record_key,
                                 record_key_len,
                                 norm_query_raw,
                                 norm_query_raw_length_in_bytes);
    if (norm_query) {
      grn_obj_close(ctx, norm_query);
    }
  } else {
    matched = exec_text_operator_raw_text_raw_text(ctx,
                                                   op,
                                                   record_key,
                                                   record_key_len,
                                                   GRN_TEXT_VALUE(query),
                                                   GRN_TEXT_LEN(query));
  }

  return matched;
}
예제 #10
0
파일: token.c 프로젝트: henbow/groonga
grn_token *
grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len,
               grn_token_mode mode, unsigned int flags)
{
  grn_token *token;
  grn_encoding encoding;
  grn_obj *tokenizer;
  grn_obj *normalizer;
  grn_obj_flags table_flags;
  if (grn_table_get_info(ctx, table, &table_flags, &encoding, &tokenizer,
                         &normalizer)) {
    return NULL;
  }
  if (!(token = GRN_MALLOC(sizeof(grn_token)))) { return NULL; }
  token->table = table;
  token->mode = mode;
  token->encoding = encoding;
  token->tokenizer = tokenizer;
  token->orig = str;
  token->orig_blen = str_len;
  token->curr = NULL;
  token->nstr = NULL;
  token->curr_size = 0;
  token->pos = -1;
  token->status = GRN_TOKEN_DOING;
  token->force_prefix = 0;
  if (tokenizer) {
    grn_obj str_, flags_;
    GRN_TEXT_INIT(&str_, GRN_OBJ_DO_SHALLOW_COPY);
    GRN_TEXT_SET_REF(&str_, str, str_len);
    GRN_UINT32_INIT(&flags_, 0);
    GRN_UINT32_SET(ctx, &flags_, flags);
    token->pctx.caller = NULL;
    token->pctx.user_data.ptr = NULL;
    token->pctx.proc = (grn_proc *)tokenizer;
    token->pctx.hooks = NULL;
    token->pctx.currh = NULL;
    token->pctx.phase = PROC_INIT;
    grn_ctx_push(ctx, &str_);
    grn_ctx_push(ctx, &flags_);
    ((grn_proc *)tokenizer)->funcs[PROC_INIT](ctx, 1, &table, &token->pctx.user_data);
    grn_obj_close(ctx, &flags_);
    grn_obj_close(ctx, &str_);
  } else {
    int nflags = 0;
    token->nstr = grn_string_open_(ctx, str, str_len,
                                   normalizer, nflags, token->encoding);
    if (token->nstr) {
      const char *normalized;
      grn_string_get_normalized(ctx, token->nstr,
                                &normalized, &(token->curr_size), NULL);
      token->curr = (const unsigned char *)normalized;
    } else {
      ERR(GRN_TOKENIZER_ERROR, "grn_string_open failed at grn_token_open");
    }
  }
  if (ctx->rc) {
    grn_token_close(ctx, token);
    token = NULL;
  }
  return token;
}
예제 #11
0
파일: mecab.c 프로젝트: firewood/groonga
/*
  This function is called for a full text search query or a document to be
  indexed. This means that both short/long strings are given.
  The return value of this function is ignored. When an error occurs in this
  function, `ctx->rc' is overwritten with an error code (not GRN_SUCCESS).
 */
static grn_obj *
mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  grn_obj *str;
  int nflags = 0;
  char *buf, *p;
  const char *s;
  grn_obj *table = args[0];
  grn_obj_flags table_flags;
  grn_encoding table_encoding;
  grn_mecab_tokenizer *token;
  unsigned int bufsize, len;
  if (!(str = grn_ctx_pop(ctx))) {
    ERR(GRN_INVALID_ARGUMENT, "missing argument");
    return NULL;
  }
  if (!sole_mecab) {
    CRITICAL_SECTION_ENTER(sole_mecab_lock);
    if (!sole_mecab) {
      sole_mecab = mecab_new2("-Owakati");
      if (!sole_mecab) {
        ERR(GRN_TOKENIZER_ERROR, "mecab_new2 failed on grn_mecab_init: %s",
            mecab_strerror(NULL));
      } else {
        sole_mecab_encoding = get_mecab_encoding(sole_mecab);
      }
    }
    CRITICAL_SECTION_LEAVE(sole_mecab_lock);
  }
  if (!sole_mecab) {
    return NULL;
  }
  grn_table_get_info(ctx, table, &table_flags, &table_encoding, NULL);
  if (table_encoding != sole_mecab_encoding) {
    ERR(GRN_TOKENIZER_ERROR,
        "MeCab dictionary charset (%s) does not match the context encoding: <%s>",
        grn_enctostr(sole_mecab_encoding), grn_enctostr(table_encoding));
    return NULL;
  }
  if (!(token = GRN_MALLOC(sizeof(grn_mecab_tokenizer)))) { return NULL; }
  token->mecab = sole_mecab;
  token->encoding = table_encoding;
  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
                                    nflags, token->encoding))) {
    GRN_FREE(token);
    ERR(GRN_TOKENIZER_ERROR, "grn_str_open failed at grn_token_open");
    return NULL;
  }
  len = token->nstr->norm_blen;
  CRITICAL_SECTION_ENTER(sole_mecab_lock);
  s = mecab_sparse_tostr2(token->mecab, token->nstr->norm, len);
  if (!s) {
    ERR(GRN_TOKENIZER_ERROR, "mecab_sparse_tostr failed len=%d err=%s",
        len, mecab_strerror(token->mecab));
  } else {
    bufsize = strlen(s) + 1;
    if (!(buf = GRN_MALLOC(bufsize))) {
      GRN_LOG(ctx, GRN_LOG_ALERT, "buffer allocation on mecab_init failed !");
    } else {
      memcpy(buf, s, bufsize);
    }
  }
  CRITICAL_SECTION_LEAVE(sole_mecab_lock);
  if (!s || !buf) {
    grn_str_close(ctx, token->nstr);
    GRN_FREE(token);
    return NULL;
  }
  /* A certain version of mecab returns trailing lf or spaces. */
  for (p = buf + bufsize - 2;
       buf <= p && isspace(*(unsigned char *)p);
       p--) { *p = '\0'; }
  user_data->ptr = token;
  token->buf = buf;
  token->next = buf;
  token->end = p + 1;
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return NULL;
}
예제 #12
0
파일: proc_schema.c 프로젝트: XLPE/groonga
static void
command_schema_table_command_collect_arguments(grn_ctx *ctx,
                                               grn_obj *table,
                                               grn_obj *arguments)
{
#define ADD(name_, value_)                              \
  grn_vector_add_element(ctx, arguments,                \
                         name_, strlen(name_),          \
                         0, GRN_DB_TEXT);               \
  grn_vector_add_element(ctx, arguments,                \
                         value_, strlen(value_),        \
                         0, GRN_DB_TEXT)

#define ADD_OBJECT_NAME(name_, object_) do {                    \
    char object_name[GRN_TABLE_MAX_KEY_SIZE];                   \
    unsigned int object_name_size;                              \
    object_name_size = grn_obj_name(ctx, object_,               \
                                    object_name,                \
                                    GRN_TABLE_MAX_KEY_SIZE);    \
    object_name[object_name_size] = '\0';                       \
    ADD(name_, object_name);                                    \
  } while (GRN_FALSE)

  ADD_OBJECT_NAME("name", table);

  {
    grn_obj flags;
    grn_table_flags table_flags;
    grn_table_flags ignored_flags = GRN_OBJ_KEY_NORMALIZE | GRN_OBJ_PERSISTENT;
    GRN_TEXT_INIT(&flags, 0);
    grn_table_get_info(ctx, table, &table_flags, NULL, NULL, NULL, NULL);
    grn_dump_table_create_flags(ctx,
                                table_flags & ~ignored_flags,
                                &flags);
    GRN_TEXT_PUTC(ctx, &flags, '\0');
    ADD("flags", GRN_TEXT_VALUE(&flags));
    GRN_OBJ_FIN(ctx, &flags);
  }

  {
    grn_obj *key_type = NULL;

    if (table->header.type != GRN_TABLE_NO_KEY &&
        table->header.domain != GRN_ID_NIL) {
      key_type = grn_ctx_at(ctx, table->header.domain);
    }
    if (key_type) {
      ADD_OBJECT_NAME("key_type", key_type);
    }
  }

  {
    grn_obj *value_type = NULL;
    grn_id range = GRN_ID_NIL;

    if (table->header.type != GRN_TABLE_DAT_KEY) {
      range = grn_obj_get_range(ctx, table);
    }
    if (range != GRN_ID_NIL) {
      value_type = grn_ctx_at(ctx, range);
    }
    if (value_type) {
      ADD_OBJECT_NAME("value_type", value_type);
    }
  }

  {
    grn_obj *tokenizer;
    tokenizer = grn_obj_get_info(ctx, table, GRN_INFO_DEFAULT_TOKENIZER, NULL);
    if (tokenizer) {
      ADD_OBJECT_NAME("default_tokenizer", tokenizer);
    }
  }

  {
    grn_obj *normalizer;
    normalizer = grn_obj_get_info(ctx, table, GRN_INFO_NORMALIZER, NULL);
    if (!normalizer && (table->header.flags & GRN_OBJ_KEY_NORMALIZE)) {
      normalizer = grn_ctx_get(ctx, "NormalizerAuto", -1);
    }
    if (normalizer) {
      ADD_OBJECT_NAME("normalizer", normalizer);
    }
  }

  if (table->header.type != GRN_TABLE_NO_KEY) {
    grn_obj token_filters;
    int n;

    GRN_PTR_INIT(&token_filters, GRN_OBJ_VECTOR, GRN_DB_OBJECT);
    grn_obj_get_info(ctx, table, GRN_INFO_TOKEN_FILTERS, &token_filters);
    n = GRN_BULK_VSIZE(&token_filters) / sizeof(grn_obj *);
    if (n > 0) {
      grn_obj token_filter_names;
      int i;

      GRN_TEXT_INIT(&token_filter_names, 0);
      for (i = 0; i < n; i++) {
        grn_obj *token_filter;
        char name[GRN_TABLE_MAX_KEY_SIZE];
        int name_size;

        token_filter = GRN_PTR_VALUE_AT(&token_filters, i);
        name_size = grn_obj_name(ctx, token_filter,
                                 name, GRN_TABLE_MAX_KEY_SIZE);
        if (i > 0) {
          GRN_TEXT_PUTC(ctx, &token_filter_names, ',');
        }
        GRN_TEXT_PUT(ctx, &token_filter_names, name, name_size);
      }
      GRN_TEXT_PUTC(ctx, &token_filter_names, '\0');
      ADD("token_filters", GRN_TEXT_VALUE(&token_filter_names));
      GRN_OBJ_FIN(ctx, &token_filter_names);
    }
    GRN_OBJ_FIN(ctx, &token_filters);
  }

#undef ADD_OBJECT_NAME
#undef ADD
}