Example #1
0
File: com.c Project: iwaim/groonga
grn_rc
grn_com_event_init(grn_ctx *ctx, grn_com_event *ev, int max_nevents, int data_size)
{
    ev->max_nevents = max_nevents;
    if ((ev->hash = grn_hash_create(ctx, NULL, sizeof(grn_sock), data_size, 0))) {
        MUTEX_INIT(ev->mutex);
        COND_INIT(ev->cond);
        GRN_COM_QUEUE_INIT(&ev->recv_old);
        ev->msg_handler = NULL;
        memset(&(ev->curr_edge_id), 0, sizeof(grn_com_addr));
        ev->acceptor = NULL;
        ev->opaque = NULL;
#ifndef USE_SELECT
# ifdef USE_EPOLL
        if ((ev->events = GRN_MALLOC(sizeof(struct epoll_event) * max_nevents))) {
            if ((ev->epfd = epoll_create(max_nevents)) != -1) {
                goto exit;
            } else {
                SERR("epoll_create");
            }
            GRN_FREE(ev->events);
        }
# else /* USE_EPOLL */
#  ifdef USE_KQUEUE
        if ((ev->events = GRN_MALLOC(sizeof(struct kevent) * max_nevents))) {
            if ((ev->kqfd = kqueue()) != -1) {
                goto exit;
            } else {
                SERR("kqueue");
            }
            GRN_FREE(ev->events);
        }
#  else /* USE_KQUEUE */
        if ((ev->events = GRN_MALLOC(sizeof(struct pollfd) * max_nevents))) {
            goto exit;
        }
#  endif /* USE_KQUEUE*/
# endif /* USE_EPOLL */
        grn_hash_close(ctx, ev->hash);
        ev->hash = NULL;
        ev->events = NULL;
#else /* USE_SELECT */
        goto exit;
#endif /* USE_SELECT */
    }
exit :
    return ctx->rc;
}
Example #2
0
grn_query *
grn_query_open(grn_ctx *ctx, const char *str, unsigned int str_len,
               grn_operator default_op, int max_exprs)
{
  grn_query *q;
  int max_cells = max_exprs * 4;
  if (!(q = GRN_MALLOC(sizeof(grn_query) + max_cells * sizeof(grn_cell) + str_len + 1))) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "grn_query_open malloc fail");
    return NULL;
  }
  q->header.type = GRN_QUERY;
  q->str = (char *)&q->cell_pool[max_cells];
  memcpy(q->str, str, str_len);
  q->str[str_len] = '\0';
  q->cur = q->str;
  q->str_end = q->str + str_len;
  q->default_op = default_op;
  q->encoding = ctx->encoding;
  q->max_exprs = max_exprs;
  q->max_cells = max_cells;
  q->cur_cell = 0;
  q->cur_expr = 0;
  q->escalation_threshold = GRN_DEFAULT_MATCH_ESCALATION_THRESHOLD;
  q->escalation_decaystep = DEFAULT_DECAYSTEP;
  q->weight_offset = 0;
  q->opt.weight_vector = NULL;
  q->weight_set = NULL;
  get_pragma(ctx, q);
  q->expr = get_expr(ctx, q);
  q->opt.vector_size = DEFAULT_WEIGHT_VECTOR_SIZE;
  q->opt.func = q->weight_set ? section_weight_cb : NULL;
  q->opt.func_arg = q->weight_set;
  q->snip_conds = NULL;
  return q;
}
Example #3
0
grn_obj *
grn_type_create(grn_ctx *ctx, const char *name, unsigned int name_size,
                grn_obj_flags flags, unsigned int size)
{
  grn_id id;
  struct _grn_type *res = NULL;
  grn_obj *db;
  if (!ctx || !ctx->impl || !(db = ctx->impl->db)) {
    ERR(GRN_INVALID_ARGUMENT, "db not initialized");
    return NULL;
  }
  GRN_API_ENTER;
  if (grn_db_check_name(ctx, name, name_size)) {
    GRN_DB_CHECK_NAME_ERR("[type][create]", name, name_size);
    GRN_API_RETURN(NULL);
  }
  if (!GRN_DB_P(db)) {
    ERR(GRN_INVALID_ARGUMENT, "invalid db assigned");
    GRN_API_RETURN(NULL);
  }
  id = grn_obj_register(ctx, db, name, name_size);
  if (id && (res = GRN_MALLOC(sizeof(grn_db_obj)))) {
    GRN_DB_OBJ_SET_TYPE(res, GRN_TYPE);
    res->obj.header.flags = flags;
    res->obj.header.domain = GRN_ID_NIL;
    GRN_TYPE_SIZE(&res->obj) = size;
    if (grn_db_obj_init(ctx, db, id, DB_OBJ(res))) {
      // grn_obj_delete(ctx, db, id);
      GRN_FREE(res);
      GRN_API_RETURN(NULL);
    }
  }
  GRN_API_RETURN((grn_obj *)res);
}
Example #4
0
static grn_rc
ngram_init(grn_ctx *ctx, grn_obj *table, grn_proc_data *user_data, uint8_t ngram_unit)
{
  grn_obj *str;
  int nflags = GRN_STR_REMOVEBLANK|GRN_STR_WITH_CTYPES;
  grn_ngram_tokenizer *token;
  grn_obj_flags table_flags;
  if (!(str = grn_ctx_pop(ctx))) { return GRN_INVALID_ARGUMENT; }
  if (!(token = GRN_MALLOC(sizeof(grn_ngram_tokenizer)))) { return ctx->rc; }
  user_data->ptr = token;
  token->uni_alpha = 1;
  token->uni_digit = 1;
  token->uni_symbol = 1;
  token->ngram_unit = ngram_unit;
  token->overlap = 0;
  token->pos = 0;
  token->skip = 0;
  grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL);
  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
                                    nflags, token->encoding))) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "grn_str_open failed at grn_token_open");
    return GRN_TOKENIZER_ERROR;
  }
  token->next = (unsigned char *)token->nstr->norm;
  token->end = token->next + token->nstr->norm_blen;
  token->ctypes = token->nstr->ctypes;
  token->len = token->nstr->length;
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return GRN_SUCCESS;
}
Example #5
0
static grn_obj *
delimited_init(grn_ctx *ctx, grn_obj *table, grn_user_data *user_data,
               uint8_t *delimiter, uint32_t delimiter_len)
{
  grn_obj *str;
  int nflags = 0;
  grn_delimited_tokenizer *token;
  grn_obj_flags table_flags;
  if (!(str = grn_ctx_pop(ctx))) {
    ERR(GRN_INVALID_ARGUMENT, "missing argument");
    return NULL;
  }
  if (!(token = GRN_MALLOC(sizeof(grn_delimited_tokenizer)))) { return NULL; }
  user_data->ptr = token;
  token->delimiter = delimiter;
  token->delimiter_len = delimiter_len;
  token->pos = 0;
  grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL);
  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
                                    nflags, token->encoding))) {
    GRN_FREE(token);
    ERR(GRN_TOKENIZER_ERROR, "grn_str_open failed at grn_token_open");
    return NULL;
  }
  token->next = (unsigned char *)token->nstr->norm;
  token->end = token->next + token->nstr->norm_blen;
  token->len = token->nstr->length;
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return NULL;
}
Example #6
0
grn_dat *
grn_dat_open(grn_ctx *ctx, const char *path)
{
  if (path && (std::strlen(path) >= (PATH_MAX - (FILE_ID_LENGTH + 1)))) {
    ERR(GRN_FILENAME_TOO_LONG, "too long path");
    return NULL;
  }

  grn_dat * const dat = static_cast<grn_dat *>(GRN_MALLOC(sizeof(grn_dat)));
  if (!dat) {
    return NULL;
  }

  grn_dat_init(ctx, dat);
  dat->io = grn_io_open(ctx, path, grn_io_auto);
  if (!dat->io) {
    GRN_FREE(dat);
    return NULL;
  }

  dat->header = (struct grn_dat_header *)grn_io_header(dat->io);
  if (!dat->header) {
    grn_io_close(ctx, dat->io);
    GRN_FREE(dat);
    return NULL;
  }
  dat->file_id = dat->header->file_id;
  dat->encoding = dat->header->encoding;
  dat->obj.header.flags = dat->header->flags;
  dat->tokenizer = grn_ctx_at(ctx, dat->header->tokenizer);
  return dat;
}
Example #7
0
static grn_obj *
uvector_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  grn_obj *str, *flags, *mode;
  grn_uvector_tokenizer *tokenizer;
  if (!(flags = grn_ctx_pop(ctx))) {
    ERR(GRN_INVALID_ARGUMENT, "[tokenizer][uvector] missing argument: flags");
    return NULL;
  }
  if (!(str = grn_ctx_pop(ctx))) {
    ERR(GRN_INVALID_ARGUMENT, "[tokenizer][uvector] missing argument: string");
    return NULL;
  }
  if (!(mode = grn_ctx_pop(ctx))) {
    ERR(GRN_INVALID_ARGUMENT, "[tokenizer][uvector] missing argument: mode");
    return NULL;
  }
  if (!(tokenizer = GRN_MALLOC(sizeof(grn_uvector_tokenizer)))) {
    ERR(GRN_NO_MEMORY_AVAILABLE,
        "[tokenizer][uvector] "
        "memory allocation to grn_uvector_tokenizer failed");
    return NULL;
  }
  user_data->ptr = tokenizer;

  grn_tokenizer_token_init(ctx, &(tokenizer->token));
  tokenizer->curr = (byte *)GRN_TEXT_VALUE(str);
  tokenizer->tail = tokenizer->curr + GRN_TEXT_LEN(str);
  tokenizer->unit = sizeof(grn_id);
  return NULL;
}
Example #8
0
grn_scanner *
grn_scanner_open(grn_ctx *ctx,
                 grn_obj *expr,
                 grn_operator op,
                 grn_bool record_exist)
{
  grn_scanner *scanner;

  scanner = GRN_MALLOC(sizeof(grn_scanner));
  if (!scanner) {
    return NULL;
  }

  scanner->source_expr = expr;
  scanner->expr = grn_expr_rewrite(ctx, expr);
  if (!scanner->expr) {
    scanner->expr = expr;
  }

  scanner->sis = grn_scan_info_build(ctx,
                                     scanner->expr,
                                     &(scanner->n_sis),
                                     op,
                                     record_exist);
  if (!scanner->sis) {
    grn_scanner_close(ctx, scanner);
    return NULL;
  }

  return scanner;
}
Example #9
0
void
test_dynamic_malloc_change(void)
{
#ifdef USE_DYNAMIC_MALLOC_CHANGE
  cut_assert_ensue_context();
  {
    grn_ctx *ctx = context;

    memory = GRN_MALLOC(1);
    cut_assert_not_null(memory);
    GRN_FREE(memory);

    grn_ctx_set_malloc(ctx, malloc_always_fail);
    memory = GRN_MALLOC(1);
    cut_assert_null(memory);
  }
#endif
}
Example #10
0
File: ctx.c Project: ikdttr/groonga
static void
grn_ctx_impl_init(grn_ctx *ctx)
{
  if (!(ctx->impl = GRN_MALLOC(sizeof(struct _grn_ctx_impl)))) { return; }
  if (!(ctx->impl->segs = grn_io_anon_map(ctx, &ctx->impl->mi,
                                          sizeof(grn_io_mapinfo) * N_SEGMENTS))) {
    GRN_FREE(ctx->impl);
    ctx->impl = NULL;
    return;
  }
#ifdef USE_DYNAMIC_MALLOC_CHANGE
  grn_ctx_impl_init_malloc(ctx);
#endif
  if (!(ctx->impl->values = grn_array_create(ctx, NULL, sizeof(grn_tmp_db_obj),
                                             GRN_ARRAY_TINY))) {
    grn_io_anon_unmap(ctx, &ctx->impl->mi, sizeof(grn_io_mapinfo) * N_SEGMENTS);
    GRN_FREE(ctx->impl);
    ctx->impl = NULL;
    return;
  }
  ctx->impl->encoding = ctx->encoding;
  ctx->impl->lifoseg = -1;
  ctx->impl->currseg = -1;
  ctx->impl->db = NULL;

  ctx->impl->qe = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(void *), 0);
  ctx->impl->stack_curr = 0;

  ctx->impl->phs = NIL;
  ctx->impl->code = NIL;
  ctx->impl->dump = NIL;
  ctx->impl->op = GRN_OP_T0LVL;
  ctx->impl->args = NIL;
  ctx->impl->envir = NIL;
  ctx->impl->value = NIL;
  ctx->impl->ncells = 0;
  ctx->impl->n_entries = 0;
  ctx->impl->seqno = 0;
  ctx->impl->lseqno = 0;
  ctx->impl->nbinds = 0;
  ctx->impl->nunbinds = 0;
  ctx->impl->feed_mode = grn_ql_atonce;
  ctx->impl->cur = NULL;
  ctx->impl->str_end = NULL;
  ctx->impl->batchmode = 0;
  ctx->impl->gc_verbose = 0;
  ctx->impl->inbuf = NULL;
  ctx->impl->co.mode = 0;
  ctx->impl->co.func = NULL;
  ctx->impl->objects = NULL;
  ctx->impl->symbols = NULL;
  ctx->impl->com = NULL;
  ctx->impl->outbuf = grn_obj_open(ctx, GRN_BULK, 0, 0);
  GRN_TEXT_INIT(&ctx->impl->subbuf, 0);
}
Example #11
0
inline static char *
grn_snip_strndup(grn_ctx *ctx, const char *string, unsigned int string_len)
{
   char *copied_string;

   copied_string = GRN_MALLOC(string_len + 1);
   if (!copied_string) {
     return NULL;
   }
   memcpy(copied_string, string, string_len);
   copied_string[string_len]= '\0'; /* not required, but for ql use */
   return copied_string;
}
Example #12
0
static grn_rc
uvector_init(grn_ctx *ctx, grn_obj *table, grn_proc_data *user_data)
{
  grn_obj *str;
  grn_uvector_tokenizer_info *token;
  if (!(str = grn_ctx_pop(ctx))) { return GRN_INVALID_ARGUMENT; }
  if (!(token = GRN_MALLOC(sizeof(grn_uvector_tokenizer_info)))) { return ctx->rc; }
  user_data->ptr = token;
  token->curr = GRN_TEXT_VALUE(str);
  token->tail = token->curr + GRN_TEXT_LEN(str);
  token->unit = sizeof(grn_id);
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return GRN_SUCCESS;
}
Example #13
0
static grn_obj *
ngram_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data, uint8_t ngram_unit,
           uint8_t uni_alpha, uint8_t uni_digit, uint8_t uni_symbol, uint8_t ignore_blank)
{
  unsigned int normalize_flags =
    GRN_STRING_REMOVE_BLANK |
    GRN_STRING_WITH_TYPES |
    GRN_STRING_REMOVE_TOKENIZED_DELIMITER;
  grn_tokenizer_query *query;
  const char *normalized;
  unsigned int normalized_length_in_bytes;
  grn_ngram_tokenizer *tokenizer;

  query = grn_tokenizer_query_open(ctx, nargs, args, normalize_flags);
  if (!query) {
    return NULL;
  }

  if (!(tokenizer = GRN_MALLOC(sizeof(grn_ngram_tokenizer)))) {
    grn_tokenizer_query_close(ctx, query);
    ERR(GRN_NO_MEMORY_AVAILABLE,
        "[tokenizer][ngram] "
        "memory allocation to grn_ngram_tokenizer failed");
    return NULL;
  }
  user_data->ptr = tokenizer;

  grn_tokenizer_token_init(ctx, &(tokenizer->token));
  tokenizer->query = query;

  tokenizer->uni_alpha = uni_alpha;
  tokenizer->uni_digit = uni_digit;
  tokenizer->uni_symbol = uni_symbol;
  tokenizer->ngram_unit = ngram_unit;
  tokenizer->ignore_blank = ignore_blank;
  tokenizer->overlap = 0;
  tokenizer->pos = 0;
  tokenizer->skip = 0;

  grn_string_get_normalized(ctx, tokenizer->query->normalized_query,
                            &normalized, &normalized_length_in_bytes,
                            &(tokenizer->len));
  tokenizer->next = (const unsigned char *)normalized;
  tokenizer->end = tokenizer->next + normalized_length_in_bytes;
  tokenizer->ctypes =
    grn_string_get_types(ctx, tokenizer->query->normalized_query);
  return NULL;
}
Example #14
0
static grn_obj *
regexp_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  unsigned int normalize_flags = GRN_STRING_WITH_TYPES;
  grn_tokenizer_query *query;
  const char *normalized;
  unsigned int normalized_length_in_bytes;
  grn_regexp_tokenizer *tokenizer;

  query = grn_tokenizer_query_open(ctx, nargs, args, normalize_flags);
  if (!query) {
    return NULL;
  }

  tokenizer = GRN_MALLOC(sizeof(grn_regexp_tokenizer));
  if (!tokenizer) {
    grn_tokenizer_query_close(ctx, query);
    ERR(GRN_NO_MEMORY_AVAILABLE,
        "[tokenizer][regexp] failed to allocate memory");
    return NULL;
  }
  user_data->ptr = tokenizer;

  grn_tokenizer_token_init(ctx, &(tokenizer->token));
  tokenizer->query = query;

  tokenizer->get.n_skip_tokens = 0;

  tokenizer->is_begin = GRN_TRUE;
  tokenizer->is_end   = GRN_FALSE;
  tokenizer->is_start_token = GRN_TRUE;
  tokenizer->is_overlapping = GRN_FALSE;

  grn_string_get_normalized(ctx, tokenizer->query->normalized_query,
                            &normalized, &normalized_length_in_bytes,
                            NULL);
  tokenizer->next = normalized;
  tokenizer->end = tokenizer->next + normalized_length_in_bytes;
  tokenizer->nth_char = 0;
  tokenizer->char_types =
    grn_string_get_types(ctx, tokenizer->query->normalized_query);

  GRN_TEXT_INIT(&(tokenizer->buffer), 0);

  return NULL;
}
Example #15
0
grn_dat *
grn_dat_create(grn_ctx *ctx, const char *path, uint32_t,
               uint32_t, uint32_t flags)
{
  if (path) {
    if (path[0] == '\0') {
      path = NULL;
    } else if (std::strlen(path) >= (PATH_MAX - (FILE_ID_LENGTH + 1))) {
      ERR(GRN_FILENAME_TOO_LONG, "too long path");
      return NULL;
    }
  }

  grn_dat * const dat = static_cast<grn_dat *>(GRN_MALLOC(sizeof(grn_dat)));
  if (!dat) {
    return NULL;
  }
  grn_dat_init(ctx, dat);
  dat->obj.header.flags = flags;

  dat->io = grn_io_create(ctx, path, sizeof(struct grn_dat_header),
                          4096, 0, grn_io_auto, GRN_IO_EXPIRE_SEGMENT);
  if (!dat->io) {
    GRN_FREE(dat);
    return NULL;
  }
  grn_io_set_type(dat->io, GRN_TABLE_DAT_KEY);

  dat->header = static_cast<struct grn_dat_header *>(grn_io_header(dat->io));
  if (!dat->header) {
    grn_io_close(ctx, dat->io);
    grn_dat_remove_file(ctx, path);
    GRN_FREE(dat);
    return NULL;
  }
  const grn_encoding encoding = (ctx->encoding != GRN_ENC_DEFAULT) ?
      ctx->encoding : grn_gctx.encoding;
  dat->header->flags = flags;
  dat->header->encoding = encoding;
  dat->header->tokenizer = GRN_ID_NIL;
  dat->header->file_id = 0;
  dat->encoding = encoding;
  dat->tokenizer = NULL;
  return dat;
}
Example #16
0
static grn_obj *
uvector_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  grn_obj *str;
  grn_uvector_tokenizer_info *token;
  if (!(str = grn_ctx_pop(ctx))) {
    ERR(GRN_INVALID_ARGUMENT, "missing argument");
    return NULL;
  }
  if (!(token = GRN_MALLOC(sizeof(grn_uvector_tokenizer_info)))) { return NULL; }
  user_data->ptr = token;
  token->curr = GRN_TEXT_VALUE(str);
  token->tail = token->curr + GRN_TEXT_LEN(str);
  token->unit = sizeof(grn_id);
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return NULL;
}
Example #17
0
static grn_obj *
delimited_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data,
               const uint8_t *delimiter, uint32_t delimiter_len)
{
  grn_tokenizer_query *query;
  unsigned int normalize_flags = 0;
  const char *normalized;
  unsigned int normalized_length_in_bytes;
  grn_delimited_tokenizer *tokenizer;

  query = grn_tokenizer_query_open(ctx, nargs, args, normalize_flags);
  if (!query) {
    return NULL;
  }

  if (!(tokenizer = GRN_MALLOC(sizeof(grn_delimited_tokenizer)))) {
    ERR(GRN_NO_MEMORY_AVAILABLE,
        "[tokenizer][delimit] "
        "memory allocation to grn_delimited_tokenizer failed");
    grn_tokenizer_query_close(ctx, query);
    return NULL;
  }
  user_data->ptr = tokenizer;

  tokenizer->query = query;

  tokenizer->have_tokenized_delimiter =
    grn_tokenizer_have_tokenized_delimiter(ctx,
                                           tokenizer->query->ptr,
                                           tokenizer->query->length,
                                           tokenizer->query->encoding);
  tokenizer->delimiter = delimiter;
  tokenizer->delimiter_len = delimiter_len;
  grn_string_get_normalized(ctx, tokenizer->query->normalized_query,
                            &normalized, &normalized_length_in_bytes,
                            NULL);
  tokenizer->next = (const unsigned char *)normalized;
  tokenizer->end = tokenizer->next + normalized_length_in_bytes;

  grn_tokenizer_token_init(ctx, &(tokenizer->token));

  return NULL;
}
Example #18
0
File: token.c Project: mooz/groonga
grn_token *
grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len,
               grn_token_mode mode)
{
  grn_token *token;
  grn_encoding encoding;
  grn_obj *tokenizer;
  if (grn_table_get_info(ctx, table, NULL, &encoding, &tokenizer)) { return NULL; }
  if (!(token = GRN_MALLOC(sizeof(grn_token)))) { return NULL; }
  token->table = table;
  token->mode = mode;
  token->encoding = encoding;
  token->tokenizer = tokenizer;
  token->orig = str;
  token->orig_blen = str_len;
  token->curr = NULL;
  token->curr_size = 0;
  token->pos = -1;
  token->status = grn_token_doing;
  token->force_prefix = 0;
  if (tokenizer) {
    grn_obj str_;
    GRN_TEXT_INIT(&str_, GRN_OBJ_DO_SHALLOW_COPY);
    GRN_TEXT_SET_REF(&str_, str, str_len);
    token->pctx.caller = NULL;
    token->pctx.user_data.ptr = NULL;
    token->pctx.proc = (grn_proc *)tokenizer;
    token->pctx.hooks = NULL;
    token->pctx.currh = NULL;
    token->pctx.phase = PROC_INIT;
    grn_ctx_push(ctx, &str_);
    ((grn_proc *)tokenizer)->funcs[PROC_INIT](ctx, 1, &table, &token->pctx.user_data);
    grn_obj_close(ctx, &str_);
  }
  if (ctx->rc) {
    GRN_FREE(token);
    token = NULL;
  }
  return token;
}
Example #19
0
static grn_obj *
ngram_init(grn_ctx *ctx, grn_obj *table, grn_user_data *user_data, uint8_t ngram_unit,
           uint8_t uni_alpha, uint8_t uni_digit, uint8_t uni_symbol, uint8_t ignore_blank)
{
  grn_obj *str;
  int nflags = GRN_STR_REMOVEBLANK|GRN_STR_WITH_CTYPES;
  grn_ngram_tokenizer *token;
  grn_obj_flags table_flags;
  if (!(str = grn_ctx_pop(ctx))) {
    ERR(GRN_INVALID_ARGUMENT, "missing argument");
    return NULL;
  }
  if (!(token = GRN_MALLOC(sizeof(grn_ngram_tokenizer)))) { return NULL; }
  user_data->ptr = token;
  token->uni_alpha = uni_alpha;
  token->uni_digit = uni_digit;
  token->uni_symbol = uni_symbol;
  token->ngram_unit = ngram_unit;
  token->ignore_blank = ignore_blank;
  token->overlap = 0;
  token->pos = 0;
  token->skip = 0;
  grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL);
  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
                                    nflags, token->encoding))) {
    GRN_FREE(token);
    ERR(GRN_TOKENIZER_ERROR, "grn_str_open failed at grn_token_open");
    return NULL;
  }
  token->next = (unsigned char *)token->nstr->norm;
  token->end = token->next + token->nstr->norm_blen;
  token->ctypes = token->nstr->ctypes;
  token->len = token->nstr->length;
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return NULL;
}
Example #20
0
grn_command_input *
grn_command_input_open(grn_ctx *ctx, grn_obj *command)
{
  grn_command_input *input = NULL;

  GRN_API_ENTER;
  input = GRN_MALLOC(sizeof(grn_command_input));
  if (!input) {
    ERR(GRN_NO_MEMORY_AVAILABLE,
        "[command-input] failed to allocate grn_command_input");
    goto exit;
  }

  input->command = command;
  /* TODO: Allocate by self. */
  {
    uint32_t n;
    input->arguments = grn_expr_get_vars(ctx, input->command, &n);
  }

exit :
  GRN_API_RETURN(input);
}
Example #21
0
inline static grn_obj *
eucjp_normalize(grn_ctx *ctx, int nargs, grn_obj **args,
                grn_user_data *user_data)
{
  static uint16_t hankana[] = {
    0xa1a1, 0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa5f2, 0xa5a1, 0xa5a3,
    0xa5a5, 0xa5a7, 0xa5a9, 0xa5e3, 0xa5e5, 0xa5e7, 0xa5c3, 0xa1bc, 0xa5a2,
    0xa5a4, 0xa5a6, 0xa5a8, 0xa5aa, 0xa5ab, 0xa5ad, 0xa5af, 0xa5b1, 0xa5b3,
    0xa5b5, 0xa5b7, 0xa5b9, 0xa5bb, 0xa5bd, 0xa5bf, 0xa5c1, 0xa5c4, 0xa5c6,
    0xa5c8, 0xa5ca, 0xa5cb, 0xa5cc, 0xa5cd, 0xa5ce, 0xa5cf, 0xa5d2, 0xa5d5,
    0xa5d8, 0xa5db, 0xa5de, 0xa5df, 0xa5e0, 0xa5e1, 0xa5e2, 0xa5e4, 0xa5e6,
    0xa5e8, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5ef, 0xa5f3, 0xa1ab,
    0xa1eb
  };
  static unsigned char dakuten[] = {
    0xf4, 0, 0, 0, 0, 0xac, 0, 0xae, 0, 0xb0, 0, 0xb2, 0, 0xb4, 0, 0xb6, 0,
    0xb8, 0, 0xba, 0, 0xbc, 0, 0xbe, 0, 0xc0, 0, 0xc2, 0, 0, 0xc5, 0, 0xc7,
    0, 0xc9, 0, 0, 0, 0, 0, 0, 0xd0, 0, 0, 0xd3, 0, 0, 0xd6, 0, 0, 0xd9, 0,
    0, 0xdc
  };
  static unsigned char handaku[] = {
    0xd1, 0, 0, 0xd4, 0, 0, 0xd7, 0, 0, 0xda, 0, 0, 0xdd
  };
  grn_string *nstr = (grn_string *)args[0];
  int16_t *ch;
  const unsigned char *s, *s_, *e;
  unsigned char *d, *d0, *d_, b;
  uint_least8_t *cp, *ctypes, ctype;
  size_t size = nstr->original_length_in_bytes, length = 0;
  int removeblankp = nstr->flags & GRN_STRING_REMOVE_BLANK;
  if (!(nstr->normalized = GRN_MALLOC(size * 2 + 1))) {
    ERR(GRN_NO_MEMORY_AVAILABLE,
        "[strinig][eucjp] failed to allocate normalized text space");
    return NULL;
  }
  d0 = (unsigned char *) nstr->normalized;
  if (nstr->flags & GRN_STRING_WITH_CHECKS) {
    if (!(nstr->checks = GRN_MALLOC(size * 2 * sizeof(int16_t) + 1))) {
      GRN_FREE(nstr->normalized);
      nstr->normalized = NULL;
      ERR(GRN_NO_MEMORY_AVAILABLE,
          "[strinig][eucjp] failed to allocate checks space");
      return NULL;
    }
  }
  ch = nstr->checks;
  if (nstr->flags & GRN_STRING_WITH_TYPES) {
    if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
      GRN_FREE(nstr->checks);
      GRN_FREE(nstr->normalized);
      nstr->checks = NULL;
      nstr->normalized = NULL;
      ERR(GRN_NO_MEMORY_AVAILABLE,
          "[strinig][eucjp] failed to allocate character types space");
      return NULL;
    }
  }
  cp = ctypes = nstr->ctypes;
  e = (unsigned char *)nstr->original + size;
  for (s = s_ = (unsigned char *) nstr->original, d = d_ = d0; s < e; s++) {
    if ((*s & 0x80)) {
      if (((s + 1) < e) && (*(s + 1) & 0x80)) {
        unsigned char c1 = *s++, c2 = *s, c3 = 0;
        switch (c1 >> 4) {
        case 0x08 :
          if (c1 == 0x8e && 0xa0 <= c2 && c2 <= 0xdf) {
            uint16_t c = hankana[c2 - 0xa0];
            switch (c) {
            case 0xa1ab :
              if (d > d0 + 1 && d[-2] == 0xa5
                  && 0xa6 <= d[-1] && d[-1] <= 0xdb && (b = dakuten[d[-1] - 0xa6])) {
                *(d - 1) = b;
                if (ch) { ch[-1] += 2; s_ += 2; }
                continue;
              } else {
                *d++ = c >> 8; *d = c & 0xff;
              }
              break;
            case 0xa1eb :
              if (d > d0 + 1 && d[-2] == 0xa5
                  && 0xcf <= d[-1] && d[-1] <= 0xdb && (b = handaku[d[-1] - 0xcf])) {
                *(d - 1) = b;
                if (ch) { ch[-1] += 2; s_ += 2; }
                continue;
              } else {
                *d++ = c >> 8; *d = c & 0xff;
              }
              break;
            default :
              *d++ = c >> 8; *d = c & 0xff;
              break;
            }
            ctype = grn_char_katakana;
          } else {
            *d++ = c1; *d = c2;
            ctype = grn_char_others;
          }
          break;
        case 0x09 :
          *d++ = c1; *d = c2;
          ctype = grn_char_others;
          break;
        case 0x0a :
          switch (c1 & 0x0f) {
          case 1 :
            switch (c2) {
            case 0xbc :
              *d++ = c1; *d = c2;
              ctype = grn_char_katakana;
              break;
            case 0xb9 :
              *d++ = c1; *d = c2;
              ctype = grn_char_kanji;
              break;
            case 0xa1 :
              if (removeblankp) {
                if (cp > ctypes) { *(cp - 1) |= GRN_CHAR_BLANK; }
                continue;
              } else {
                *d = ' ';
                ctype = GRN_CHAR_BLANK|grn_char_symbol;
              }
              break;
            default :
              if (c2 >= 0xa4 && (c3 = symbol[c2 - 0xa4])) {
                *d = c3;
                ctype = grn_char_symbol;
              } else {
                *d++ = c1; *d = c2;
                ctype = grn_char_others;
              }
              break;
            }
            break;
          case 2 :
            *d++ = c1; *d = c2;
            ctype = grn_char_symbol;
            break;
          case 3 :
            c3 = c2 - 0x80;
            if ('a' <= c3 && c3 <= 'z') {
              ctype = grn_char_alpha;
              *d = c3;
            } else if ('A' <= c3 && c3 <= 'Z') {
              ctype = grn_char_alpha;
              *d = c3 + 0x20;
            } else if ('0' <= c3 && c3 <= '9') {
              ctype = grn_char_digit;
              *d = c3;
            } else {
              ctype = grn_char_others;
              *d++ = c1; *d = c2;
            }
            break;
          case 4 :
            *d++ = c1; *d = c2;
            ctype = grn_char_hiragana;
            break;
          case 5 :
            *d++ = c1; *d = c2;
            ctype = grn_char_katakana;
            break;
          case 6 :
          case 7 :
          case 8 :
            *d++ = c1; *d = c2;
            ctype = grn_char_symbol;
            break;
          default :
            *d++ = c1; *d = c2;
            ctype = grn_char_others;
            break;
          }
          break;
        default :
          *d++ = c1; *d = c2;
          ctype = grn_char_kanji;
          break;
        }
      } else {
Example #22
0
grn_dat_cursor *
grn_dat_cursor_open(grn_ctx *ctx, grn_dat *dat,
                    const void *min, unsigned int min_size,
                    const void *max, unsigned int max_size,
                    int offset, int limit, int flags)
{
  if (!grn_dat_open_trie_if_needed(ctx, dat)) {
    return NULL;
  }

  grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie);
  if (!trie) {
    grn_dat_cursor * const dc =
        static_cast<grn_dat_cursor *>(GRN_MALLOC(sizeof(grn_dat_cursor)));
    if (dc) {
      grn_dat_cursor_init(ctx, dc);
    }
    return dc;
  }

  grn_dat_cursor * const dc =
      static_cast<grn_dat_cursor *>(GRN_MALLOC(sizeof(grn_dat_cursor)));
  if (!dc) {
    return NULL;
  }
  grn_dat_cursor_init(ctx, dc);

  try {
    if ((flags & GRN_CURSOR_BY_ID) != 0) {
      dc->cursor = grn::dat::CursorFactory::open(*trie,
          min, min_size, max, max_size, offset, limit,
          grn::dat::ID_RANGE_CURSOR |
          ((flags & GRN_CURSOR_DESCENDING) ? grn::dat::DESCENDING_CURSOR : 0) |
          ((flags & GRN_CURSOR_GT) ? grn::dat::EXCEPT_LOWER_BOUND : 0) |
          ((flags & GRN_CURSOR_LT) ? grn::dat::EXCEPT_UPPER_BOUND : 0));
    } else if ((flags & GRN_CURSOR_PREFIX) != 0) {
      if (max && max_size) {
        if ((dat->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) != 0) {
          dc->cursor = grn::dat::CursorFactory::open(*trie,
              NULL, min_size, max, max_size, offset, limit,
              grn::dat::PREFIX_CURSOR | grn::dat::DESCENDING_CURSOR);
        } else {
          // TODO: near
        }
      } else if (min && min_size) {
        if ((flags & GRN_CURSOR_RK) != 0) {
          // TODO: rk search
        } else {
          dc->cursor = grn::dat::CursorFactory::open(*trie,
              min, min_size, NULL, 0, offset, limit,
              grn::dat::PREDICTIVE_CURSOR |
              ((flags & GRN_CURSOR_DESCENDING) ? grn::dat::DESCENDING_CURSOR : 0) |
              ((flags & GRN_CURSOR_GT) ? grn::dat::EXCEPT_EXACT_MATCH : 0));
        }
      }
    } else {
      dc->cursor = grn::dat::CursorFactory::open(*trie,
          min, min_size, max, max_size, offset, limit,
          grn::dat::KEY_RANGE_CURSOR |
          ((flags & GRN_CURSOR_DESCENDING) ? grn::dat::DESCENDING_CURSOR : 0) |
          ((flags & GRN_CURSOR_GT) ? grn::dat::EXCEPT_LOWER_BOUND : 0) |
          ((flags & GRN_CURSOR_LT) ? grn::dat::EXCEPT_UPPER_BOUND : 0));
    }
  } catch (const grn::dat::Exception &ex) {
    ERR(grn_dat_translate_error_code(ex.code()),
        "grn::dat::CursorFactory::open failed");
    GRN_FREE(dc);
    return NULL;
  }
  if (!dc->cursor) {
    ERR(GRN_INVALID_ARGUMENT, "unsupported query");
    GRN_FREE(dc);
    return NULL;
  }
  dc->dat = dat;
  return dc;
}
Example #23
0
grn_token *
grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len,
               grn_token_mode mode, unsigned int flags)
{
  grn_token *token;
  grn_encoding encoding;
  grn_obj *tokenizer;
  grn_obj *normalizer;
  grn_obj_flags table_flags;
  if (grn_table_get_info(ctx, table, &table_flags, &encoding, &tokenizer,
                         &normalizer)) {
    return NULL;
  }
  if (!(token = GRN_MALLOC(sizeof(grn_token)))) { return NULL; }
  token->table = table;
  token->mode = mode;
  token->encoding = encoding;
  token->tokenizer = tokenizer;
  token->orig = str;
  token->orig_blen = str_len;
  token->curr = NULL;
  token->nstr = NULL;
  token->curr_size = 0;
  token->pos = -1;
  token->status = GRN_TOKEN_DOING;
  token->force_prefix = 0;
  if (tokenizer) {
    grn_obj str_, flags_;
    GRN_TEXT_INIT(&str_, GRN_OBJ_DO_SHALLOW_COPY);
    GRN_TEXT_SET_REF(&str_, str, str_len);
    GRN_UINT32_INIT(&flags_, 0);
    GRN_UINT32_SET(ctx, &flags_, flags);
    token->pctx.caller = NULL;
    token->pctx.user_data.ptr = NULL;
    token->pctx.proc = (grn_proc *)tokenizer;
    token->pctx.hooks = NULL;
    token->pctx.currh = NULL;
    token->pctx.phase = PROC_INIT;
    grn_ctx_push(ctx, &str_);
    grn_ctx_push(ctx, &flags_);
    ((grn_proc *)tokenizer)->funcs[PROC_INIT](ctx, 1, &table, &token->pctx.user_data);
    grn_obj_close(ctx, &flags_);
    grn_obj_close(ctx, &str_);
  } else {
    int nflags = 0;
    token->nstr = grn_string_open_(ctx, str, str_len,
                                   normalizer, nflags, token->encoding);
    if (token->nstr) {
      const char *normalized;
      grn_string_get_normalized(ctx, token->nstr,
                                &normalized, &(token->curr_size), NULL);
      token->curr = (const unsigned char *)normalized;
    } else {
      ERR(GRN_TOKENIZER_ERROR, "grn_string_open failed at grn_token_open");
    }
  }
  if (ctx->rc) {
    grn_token_close(ctx, token);
    token = NULL;
  }
  return token;
}
Example #24
0
static grn_rc
mecab_init(grn_ctx *ctx, grn_obj *table, grn_proc_data *user_data)
{
  grn_obj *str;
  int nflags = 0;
  char *buf, *s, *p;
  char mecab_err[256];
  grn_obj_flags table_flags;
  grn_mecab_tokenizer *token;
  unsigned int bufsize, maxtrial = 10, len;
  if (!(str = grn_ctx_pop(ctx))) { return GRN_INVALID_ARGUMENT; }
  SOLE_MECAB_CONFIRM;
  if (!sole_mecab) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "mecab_new failed on grn_mecab_init");
    return GRN_TOKENIZER_ERROR;
  }
  if (!(token = GRN_MALLOC(sizeof(grn_mecab_tokenizer)))) { return ctx->rc; }
  user_data->ptr = token;
  token->mecab = sole_mecab;
  // if (!(token->mecab = mecab_new3())) {
  grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL);
  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
                                    nflags, token->encoding))) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "grn_str_open failed at grn_token_open");
    return GRN_TOKENIZER_ERROR;
  }
  len = token->nstr->norm_blen;
  mecab_err[sizeof(mecab_err) - 1] = '\0';
  for (bufsize = len * 2 + 1; maxtrial; bufsize *= 2, maxtrial--) {
    if(!(buf = GRN_MALLOC(bufsize + 1))) {
      GRN_LOG(ctx, GRN_LOG_ALERT, "buffer allocation on mecab_init failed !");
      GRN_FREE(token);
      return ctx->rc;
    }
    MUTEX_LOCK(sole_mecab_lock);
    s = mecab_sparse_tostr3(token->mecab, token->nstr->norm, len, buf, bufsize);
    if (!s) {
      strncpy(mecab_err, mecab_strerror(token->mecab), sizeof(mecab_err) - 1);
    }
    MUTEX_UNLOCK(sole_mecab_lock);
    if (s) { break; }
    GRN_FREE(buf);
    if (strstr(mecab_err, "output buffer overflow") == NULL) { break; }
  }
  if (!s) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "mecab_sparse_tostr failed len=%d bufsize=%d err=%s",
            len, bufsize, mecab_err);
    GRN_FREE(token);
    return GRN_TOKENIZER_ERROR;
  }
  // certain version of mecab returns trailing lf or spaces.
  for (p = buf + strlen(buf) - 1;
       buf <= p && (*p == '\n' || isspace(*(unsigned char *)p));
       p--) { *p = '\0'; }
  //grn_log("sparsed='%s'", s);
  token->buf = (unsigned char *)buf;
  token->next = (unsigned char *)buf;
  token->end = (unsigned char *)buf + strlen(buf);
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return GRN_SUCCESS;
}
Example #25
0
grn_snip *
grn_snip_open(grn_ctx *ctx, int flags, unsigned int width,
              unsigned int max_results,
              const char *defaultopentag, unsigned int defaultopentag_len,
              const char *defaultclosetag, unsigned int defaultclosetag_len,
              grn_snip_mapping *mapping)
{
  int copy_tag;
  grn_snip *ret = NULL;
  if (!(ret = GRN_MALLOC(sizeof(grn_snip)))) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "grn_snip allocation failed on grn_snip_open");
    return NULL;
  }
  if (max_results > MAX_SNIP_RESULT_COUNT || max_results == 0) {
    GRN_LOG(ctx, GRN_LOG_WARNING, "max_results is invalid on grn_snip_open");
    GRN_FREE(ret);
    return NULL;
  }
  GRN_API_ENTER;
  ret->encoding = ctx->encoding;
  ret->flags = flags;
  ret->width = width;
  ret->max_results = max_results;
  ret->defaultopentag = NULL;
  ret->defaultclosetag = NULL;

  copy_tag = flags & GRN_SNIP_COPY_TAG;
  if (grn_snip_set_default_tag(ctx,
                               &(ret->defaultopentag),
                               &(ret->defaultopentag_len),
                               defaultopentag, defaultopentag_len,
                               copy_tag)) {
    GRN_FREE(ret);
    GRN_API_RETURN(NULL);
  }

  if (grn_snip_set_default_tag(ctx,
                               &(ret->defaultclosetag),
                               &(ret->defaultclosetag_len),
                               defaultclosetag, defaultclosetag_len,
                               copy_tag)) {
    if (copy_tag && ret->defaultopentag) {
      GRN_FREE((void *)ret->defaultopentag);
    }
    GRN_FREE(ret);
    GRN_API_RETURN(NULL);
  }

  ret->cond_len = 0;
  ret->mapping = mapping;
  ret->nstr = NULL;
  ret->tag_count = 0;
  ret->snip_count = 0;
  if (ret->flags & GRN_SNIP_NORMALIZE) {
    ret->normalizer = GRN_NORMALIZER_AUTO;
  } else {
    ret->normalizer = NULL;
  }

  GRN_DB_OBJ_SET_TYPE(ret, GRN_SNIP);
  {
    grn_obj *db;
    grn_id id;
    db = grn_ctx_db(ctx);
    id = grn_obj_register(ctx, db, NULL, 0);
    DB_OBJ(ret)->header.domain = GRN_ID_NIL;
    DB_OBJ(ret)->range = GRN_ID_NIL;
    grn_db_obj_init(ctx, db, id, DB_OBJ(ret));
  }

  GRN_API_RETURN(ret);
}
Example #26
0
/*
  This function is called for a full text search query or a document to be
  indexed. This means that both short/long strings are given.
  The return value of this function is ignored. When an error occurs in this
  function, `ctx->rc' is overwritten with an error code (not GRN_SUCCESS).
 */
static grn_obj *
mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  grn_obj *str;
  int nflags = 0;
  char *buf, *p;
  const char *s;
  grn_obj *table = args[0];
  grn_obj_flags table_flags;
  grn_encoding table_encoding;
  grn_mecab_tokenizer *token;
  unsigned int bufsize, len;
  if (!(str = grn_ctx_pop(ctx))) {
    ERR(GRN_INVALID_ARGUMENT, "missing argument");
    return NULL;
  }
  if (!sole_mecab) {
    CRITICAL_SECTION_ENTER(sole_mecab_lock);
    if (!sole_mecab) {
      sole_mecab = mecab_new2("-Owakati");
      if (!sole_mecab) {
        ERR(GRN_TOKENIZER_ERROR, "mecab_new2 failed on grn_mecab_init: %s",
            mecab_strerror(NULL));
      } else {
        sole_mecab_encoding = get_mecab_encoding(sole_mecab);
      }
    }
    CRITICAL_SECTION_LEAVE(sole_mecab_lock);
  }
  if (!sole_mecab) {
    return NULL;
  }
  grn_table_get_info(ctx, table, &table_flags, &table_encoding, NULL);
  if (table_encoding != sole_mecab_encoding) {
    ERR(GRN_TOKENIZER_ERROR,
        "MeCab dictionary charset (%s) does not match the context encoding: <%s>",
        grn_enctostr(sole_mecab_encoding), grn_enctostr(table_encoding));
    return NULL;
  }
  if (!(token = GRN_MALLOC(sizeof(grn_mecab_tokenizer)))) { return NULL; }
  token->mecab = sole_mecab;
  token->encoding = table_encoding;
  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
                                    nflags, token->encoding))) {
    GRN_FREE(token);
    ERR(GRN_TOKENIZER_ERROR, "grn_str_open failed at grn_token_open");
    return NULL;
  }
  len = token->nstr->norm_blen;
  CRITICAL_SECTION_ENTER(sole_mecab_lock);
  s = mecab_sparse_tostr2(token->mecab, token->nstr->norm, len);
  if (!s) {
    ERR(GRN_TOKENIZER_ERROR, "mecab_sparse_tostr failed len=%d err=%s",
        len, mecab_strerror(token->mecab));
  } else {
    bufsize = strlen(s) + 1;
    if (!(buf = GRN_MALLOC(bufsize))) {
      GRN_LOG(ctx, GRN_LOG_ALERT, "buffer allocation on mecab_init failed !");
    } else {
      memcpy(buf, s, bufsize);
    }
  }
  CRITICAL_SECTION_LEAVE(sole_mecab_lock);
  if (!s || !buf) {
    grn_str_close(ctx, token->nstr);
    GRN_FREE(token);
    return NULL;
  }
  /* A certain version of mecab returns trailing lf or spaces. */
  for (p = buf + bufsize - 2;
       buf <= p && isspace(*(unsigned char *)p);
       p--) { *p = '\0'; }
  user_data->ptr = token;
  token->buf = buf;
  token->next = buf;
  token->end = p + 1;
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return NULL;
}
Example #27
0
static int
do_client()
{
    int rc = -1;
    char *buf;
    grn_thread thread;
    struct timeval tvb, tve;
    grn_com_header sheader;
    grn_ctx ctx_, *ctx = &ctx_;
    grn_ctx_init(ctx, 0);
    GRN_COM_QUEUE_INIT(&fsessions);
    sessions = grn_hash_create(ctx, NULL, sizeof(grn_sock), sizeof(session), 0);
    sheader.proto = GRN_COM_PROTO_GQTP;
    sheader.qtype = 0;
    sheader.keylen = 0;
    sheader.level = 0;
    sheader.flags = 0;
    sheader.status = 0;
    sheader.opaque = 0;
    sheader.cas = 0;
    if ((buf = GRN_MALLOC(BUFSIZE))) {
        if (!grn_com_event_init(ctx, &ev, 1000, sizeof(grn_com))) {
            ev.msg_handler = msg_handler;
            if (!THREAD_CREATE(thread, receiver, NULL)) {
                int cnt = 0;
                gettimeofday(&tvb, NULL);
                lprint(ctx, "begin: max_concurrency=%d max_tp=%d", max_con, max_tp);
                while (fgets(buf, BUFSIZE, stdin)) {
                    uint32_t size = strlen(buf) - 1;
                    session *s = session_alloc(ctx, dests + (cnt++ % dest_cnt));
                    if (s) {
                        gettimeofday(&s->tv, NULL);
                        s->n_query++;
                        s->query_id = ++nsent;
                        s->n_sessions = (nsent - nrecv);
                        switch (proto) {
                        case 'H' :
                        case 'h' :
                            if (grn_com_send_text(ctx, s->com, buf, size, 0)) {
                                fprintf(stderr, "grn_com_send_text failed\n");
                            }
                            s->stat = 2;
                            /*
                            lprint(ctx, "sent %04d %04d %d",
                                   s->n_query, s->query_id, s->com->fd);
                            */
                            break;
                        default :
                            if (grn_com_send(ctx, s->com, &sheader, buf, size, 0)) {
                                fprintf(stderr, "grn_com_send failed\n");
                            }
                            break;
                        }
                    } else {
                        fprintf(stderr, "grn_com_copen failed\n");
                    }
                    for (;;) {
                        gettimeofday(&tve, NULL);
                        if ((nrecv < max_tp * (tve.tv_sec - tvb.tv_sec)) &&
                                (nsent - nrecv) < max_con) {
                            break;
                        }
                        /* lprint(ctx, "s:%d r:%d", nsent, nrecv); */
                        usleep(1000);
                    }
                    if (!(nsent % 1000)) {
                        lprint(ctx, "     : %d", nsent);
                    }
                }
                done = 1;
                pthread_join(thread, NULL);
                gettimeofday(&tve, NULL);
                {
                    double qps;
                    uint64_t etime = (tve.tv_sec - tvb.tv_sec);
                    etime *= 1000000;
                    etime += (tve.tv_usec - tvb.tv_usec);
                    qps = (double)nsent * 1000000 / etime;
                    lprint(ctx, "end  : n=%d min=%d max=%d avg=%d qps=%f etime=%d.%06d", nsent, etime_min, etime_max, (int)(etime_amount / nsent), qps, etime / 1000000, etime % 1000000);
                }
                {
                    session *s;
                    GRN_HASH_EACH(ctx, sessions, id, NULL, NULL, &s, {
                        session_close(ctx, s);
                    });
                }
                rc = 0;
            } else {
Example #28
0
static grn_obj *
regexp_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  unsigned int normalize_flags = 0;
  grn_tokenizer_query *query;
  const char *normalized;
  unsigned int normalized_length_in_bytes;
  grn_regexp_tokenizer *tokenizer;

  query = grn_tokenizer_query_open(ctx, nargs, args, normalize_flags);
  if (!query) {
    return NULL;
  }

  tokenizer = GRN_MALLOC(sizeof(grn_regexp_tokenizer));
  if (!tokenizer) {
    grn_tokenizer_query_close(ctx, query);
    ERR(GRN_NO_MEMORY_AVAILABLE,
        "[tokenizer][regexp] failed to allocate memory");
    return NULL;
  }
  user_data->ptr = tokenizer;

  grn_tokenizer_token_init(ctx, &(tokenizer->token));
  tokenizer->query = query;

  tokenizer->get.have_begin = GRN_FALSE;
  tokenizer->get.have_end   = GRN_FALSE;
  tokenizer->get.n_skip_tokens = 0;

  tokenizer->is_begin = GRN_TRUE;
  tokenizer->is_end   = GRN_FALSE;
  tokenizer->is_first_token = GRN_TRUE;
  tokenizer->is_overlapping = GRN_FALSE;

  grn_string_get_normalized(ctx, tokenizer->query->normalized_query,
                            &normalized, &normalized_length_in_bytes,
                            NULL);
  tokenizer->next = normalized;
  tokenizer->end = tokenizer->next + normalized_length_in_bytes;

  if (tokenizer->query->tokenize_mode == GRN_TOKEN_GET) {
    unsigned int query_length = tokenizer->query->length;
    if (query_length >= 2) {
      const char *query_string = tokenizer->query->ptr;
      grn_encoding encoding = tokenizer->query->encoding;
      if (query_string[0] == '\\' && query_string[1] == 'A') {
        tokenizer->get.have_begin = GRN_TRUE;
        /* TODO: It assumes that both "\\" and "A" are normalized to 1
           characters. Normalizer may omit character or expand to
           multiple characters. */
        tokenizer->next += grn_charlen_(ctx, tokenizer->next, tokenizer->end,
                                        encoding);
        tokenizer->next += grn_charlen_(ctx, tokenizer->next, tokenizer->end,
                                        encoding);
      }
      if (query_string[query_length - 2] == '\\' &&
          query_string[query_length - 1] == 'z') {
        tokenizer->get.have_end = GRN_TRUE;
        /* TODO: It assumes that both "\\" and "z" are normalized to 1
           byte characters. Normalizer may omit character or expand to
           multiple characters. */
        tokenizer->end -= grn_charlen_(ctx,
                                       tokenizer->end - 1,
                                       tokenizer->end,
                                       encoding);
        tokenizer->end -= grn_charlen_(ctx,
                                       tokenizer->end - 1,
                                       tokenizer->end,
                                       encoding);
      }
    }
  }

  GRN_TEXT_INIT(&(tokenizer->buffer), 0);

  return NULL;
}
static grn_string *
grn_fake_string_open(grn_ctx *ctx, grn_string *string)
{
  /* TODO: support GRN_STRING_REMOVE_BLANK flag and ctypes */
  grn_string *nstr = string;
  const char *str;
  unsigned int str_len;

  str = nstr->original;
  str_len = nstr->original_length_in_bytes;

  if (!(nstr->normalized = GRN_MALLOC(str_len + 1))) {
    ERR(GRN_NO_MEMORY_AVAILABLE,
        "[strinig][fake] failed to allocate normalized text space");
    grn_string_close(ctx, (grn_obj *)nstr);
    return NULL;
  }

  if (nstr->flags & GRN_STRING_REMOVE_TOKENIZED_DELIMITER &&
      ctx->encoding == GRN_ENC_UTF8) {
    int char_length;
    const char *source_current = str;
    const char *source_end = str + str_len;
    char *destination = nstr->normalized;
    unsigned int destination_length = 0;
    while ((char_length = grn_charlen(ctx, source_current, source_end)) > 0) {
      if (!grn_tokenizer_is_tokenized_delimiter(ctx,
                                                source_current, char_length,
                                                ctx->encoding)) {
        memcpy(destination, source_current, char_length);
        destination += char_length;
        destination_length += char_length;
      }
      source_current += char_length;
    }
    nstr->normalized[destination_length] = '\0';
    nstr->normalized_length_in_bytes = destination_length;
  } else {
    memcpy(nstr->normalized, str, str_len);
    nstr->normalized[str_len] = '\0';
    nstr->normalized_length_in_bytes = str_len;
  }

  if (nstr->flags & GRN_STRING_WITH_CHECKS) {
    int16_t f = 0;
    unsigned char c;
    size_t i;
    if (!(nstr->checks = (int16_t *) GRN_MALLOC(sizeof(int16_t) * str_len))) {
      grn_string_close(ctx, (grn_obj *)nstr);
      ERR(GRN_NO_MEMORY_AVAILABLE,
          "[strinig][fake] failed to allocate checks space");
      return NULL;
    }
    switch (nstr->encoding) {
    case GRN_ENC_EUC_JP:
      for (i = 0; i < str_len; i++) {
        if (!f) {
          c = (unsigned char) str[i];
          f = ((c >= 0xa1U && c <= 0xfeU) || c == 0x8eU ? 2 : (c == 0x8fU ? 3 : 1)
            );
          nstr->checks[i] = f;
        } else {
          nstr->checks[i] = 0;
        }
        f--;
      }
      break;
    case GRN_ENC_SJIS:
      for (i = 0; i < str_len; i++) {
        if (!f) {
          c = (unsigned char) str[i];
          f = (c >= 0x81U && ((c <= 0x9fU) || (c >= 0xe0U && c <= 0xfcU)) ? 2 : 1);
          nstr->checks[i] = f;
        } else {
          nstr->checks[i] = 0;
        }
        f--;
      }
      break;
    case GRN_ENC_UTF8:
      for (i = 0; i < str_len; i++) {
        if (!f) {
          c = (unsigned char) str[i];
          f = (c & 0x80U ? (c & 0x20U ? (c & 0x10U ? 4 : 3)
                           : 2)
               : 1);
          nstr->checks[i] = f;
        } else {
          nstr->checks[i] = 0;
        }
        f--;
      }
      break;
    default:
      for (i = 0; i < str_len; i++) {
        nstr->checks[i] = 1;
      }
      break;
    }
  }
  return nstr;
}