grn_rc grn_com_event_init(grn_ctx *ctx, grn_com_event *ev, int max_nevents, int data_size) { ev->max_nevents = max_nevents; if ((ev->hash = grn_hash_create(ctx, NULL, sizeof(grn_sock), data_size, 0))) { MUTEX_INIT(ev->mutex); COND_INIT(ev->cond); GRN_COM_QUEUE_INIT(&ev->recv_old); ev->msg_handler = NULL; memset(&(ev->curr_edge_id), 0, sizeof(grn_com_addr)); ev->acceptor = NULL; ev->opaque = NULL; #ifndef USE_SELECT # ifdef USE_EPOLL if ((ev->events = GRN_MALLOC(sizeof(struct epoll_event) * max_nevents))) { if ((ev->epfd = epoll_create(max_nevents)) != -1) { goto exit; } else { SERR("epoll_create"); } GRN_FREE(ev->events); } # else /* USE_EPOLL */ # ifdef USE_KQUEUE if ((ev->events = GRN_MALLOC(sizeof(struct kevent) * max_nevents))) { if ((ev->kqfd = kqueue()) != -1) { goto exit; } else { SERR("kqueue"); } GRN_FREE(ev->events); } # else /* USE_KQUEUE */ if ((ev->events = GRN_MALLOC(sizeof(struct pollfd) * max_nevents))) { goto exit; } # endif /* USE_KQUEUE*/ # endif /* USE_EPOLL */ grn_hash_close(ctx, ev->hash); ev->hash = NULL; ev->events = NULL; #else /* USE_SELECT */ goto exit; #endif /* USE_SELECT */ } exit : return ctx->rc; }
grn_query * grn_query_open(grn_ctx *ctx, const char *str, unsigned int str_len, grn_operator default_op, int max_exprs) { grn_query *q; int max_cells = max_exprs * 4; if (!(q = GRN_MALLOC(sizeof(grn_query) + max_cells * sizeof(grn_cell) + str_len + 1))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_query_open malloc fail"); return NULL; } q->header.type = GRN_QUERY; q->str = (char *)&q->cell_pool[max_cells]; memcpy(q->str, str, str_len); q->str[str_len] = '\0'; q->cur = q->str; q->str_end = q->str + str_len; q->default_op = default_op; q->encoding = ctx->encoding; q->max_exprs = max_exprs; q->max_cells = max_cells; q->cur_cell = 0; q->cur_expr = 0; q->escalation_threshold = GRN_DEFAULT_MATCH_ESCALATION_THRESHOLD; q->escalation_decaystep = DEFAULT_DECAYSTEP; q->weight_offset = 0; q->opt.weight_vector = NULL; q->weight_set = NULL; get_pragma(ctx, q); q->expr = get_expr(ctx, q); q->opt.vector_size = DEFAULT_WEIGHT_VECTOR_SIZE; q->opt.func = q->weight_set ? section_weight_cb : NULL; q->opt.func_arg = q->weight_set; q->snip_conds = NULL; return q; }
grn_obj * grn_type_create(grn_ctx *ctx, const char *name, unsigned int name_size, grn_obj_flags flags, unsigned int size) { grn_id id; struct _grn_type *res = NULL; grn_obj *db; if (!ctx || !ctx->impl || !(db = ctx->impl->db)) { ERR(GRN_INVALID_ARGUMENT, "db not initialized"); return NULL; } GRN_API_ENTER; if (grn_db_check_name(ctx, name, name_size)) { GRN_DB_CHECK_NAME_ERR("[type][create]", name, name_size); GRN_API_RETURN(NULL); } if (!GRN_DB_P(db)) { ERR(GRN_INVALID_ARGUMENT, "invalid db assigned"); GRN_API_RETURN(NULL); } id = grn_obj_register(ctx, db, name, name_size); if (id && (res = GRN_MALLOC(sizeof(grn_db_obj)))) { GRN_DB_OBJ_SET_TYPE(res, GRN_TYPE); res->obj.header.flags = flags; res->obj.header.domain = GRN_ID_NIL; GRN_TYPE_SIZE(&res->obj) = size; if (grn_db_obj_init(ctx, db, id, DB_OBJ(res))) { // grn_obj_delete(ctx, db, id); GRN_FREE(res); GRN_API_RETURN(NULL); } } GRN_API_RETURN((grn_obj *)res); }
static grn_rc ngram_init(grn_ctx *ctx, grn_obj *table, grn_proc_data *user_data, uint8_t ngram_unit) { grn_obj *str; int nflags = GRN_STR_REMOVEBLANK|GRN_STR_WITH_CTYPES; grn_ngram_tokenizer *token; grn_obj_flags table_flags; if (!(str = grn_ctx_pop(ctx))) { return GRN_INVALID_ARGUMENT; } if (!(token = GRN_MALLOC(sizeof(grn_ngram_tokenizer)))) { return ctx->rc; } user_data->ptr = token; token->uni_alpha = 1; token->uni_digit = 1; token->uni_symbol = 1; token->ngram_unit = ngram_unit; token->overlap = 0; token->pos = 0; token->skip = 0; grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL); nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE); if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str), nflags, token->encoding))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_str_open failed at grn_token_open"); return GRN_TOKENIZER_ERROR; } token->next = (unsigned char *)token->nstr->norm; token->end = token->next + token->nstr->norm_blen; token->ctypes = token->nstr->ctypes; token->len = token->nstr->length; GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY); GRN_UINT32_INIT(&token->stat_, 0); return GRN_SUCCESS; }
static grn_obj * delimited_init(grn_ctx *ctx, grn_obj *table, grn_user_data *user_data, uint8_t *delimiter, uint32_t delimiter_len) { grn_obj *str; int nflags = 0; grn_delimited_tokenizer *token; grn_obj_flags table_flags; if (!(str = grn_ctx_pop(ctx))) { ERR(GRN_INVALID_ARGUMENT, "missing argument"); return NULL; } if (!(token = GRN_MALLOC(sizeof(grn_delimited_tokenizer)))) { return NULL; } user_data->ptr = token; token->delimiter = delimiter; token->delimiter_len = delimiter_len; token->pos = 0; grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL); nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE); if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str), nflags, token->encoding))) { GRN_FREE(token); ERR(GRN_TOKENIZER_ERROR, "grn_str_open failed at grn_token_open"); return NULL; } token->next = (unsigned char *)token->nstr->norm; token->end = token->next + token->nstr->norm_blen; token->len = token->nstr->length; GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY); GRN_UINT32_INIT(&token->stat_, 0); return NULL; }
grn_dat * grn_dat_open(grn_ctx *ctx, const char *path) { if (path && (std::strlen(path) >= (PATH_MAX - (FILE_ID_LENGTH + 1)))) { ERR(GRN_FILENAME_TOO_LONG, "too long path"); return NULL; } grn_dat * const dat = static_cast<grn_dat *>(GRN_MALLOC(sizeof(grn_dat))); if (!dat) { return NULL; } grn_dat_init(ctx, dat); dat->io = grn_io_open(ctx, path, grn_io_auto); if (!dat->io) { GRN_FREE(dat); return NULL; } dat->header = (struct grn_dat_header *)grn_io_header(dat->io); if (!dat->header) { grn_io_close(ctx, dat->io); GRN_FREE(dat); return NULL; } dat->file_id = dat->header->file_id; dat->encoding = dat->header->encoding; dat->obj.header.flags = dat->header->flags; dat->tokenizer = grn_ctx_at(ctx, dat->header->tokenizer); return dat; }
static grn_obj * uvector_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_obj *str, *flags, *mode; grn_uvector_tokenizer *tokenizer; if (!(flags = grn_ctx_pop(ctx))) { ERR(GRN_INVALID_ARGUMENT, "[tokenizer][uvector] missing argument: flags"); return NULL; } if (!(str = grn_ctx_pop(ctx))) { ERR(GRN_INVALID_ARGUMENT, "[tokenizer][uvector] missing argument: string"); return NULL; } if (!(mode = grn_ctx_pop(ctx))) { ERR(GRN_INVALID_ARGUMENT, "[tokenizer][uvector] missing argument: mode"); return NULL; } if (!(tokenizer = GRN_MALLOC(sizeof(grn_uvector_tokenizer)))) { ERR(GRN_NO_MEMORY_AVAILABLE, "[tokenizer][uvector] " "memory allocation to grn_uvector_tokenizer failed"); return NULL; } user_data->ptr = tokenizer; grn_tokenizer_token_init(ctx, &(tokenizer->token)); tokenizer->curr = (byte *)GRN_TEXT_VALUE(str); tokenizer->tail = tokenizer->curr + GRN_TEXT_LEN(str); tokenizer->unit = sizeof(grn_id); return NULL; }
grn_scanner * grn_scanner_open(grn_ctx *ctx, grn_obj *expr, grn_operator op, grn_bool record_exist) { grn_scanner *scanner; scanner = GRN_MALLOC(sizeof(grn_scanner)); if (!scanner) { return NULL; } scanner->source_expr = expr; scanner->expr = grn_expr_rewrite(ctx, expr); if (!scanner->expr) { scanner->expr = expr; } scanner->sis = grn_scan_info_build(ctx, scanner->expr, &(scanner->n_sis), op, record_exist); if (!scanner->sis) { grn_scanner_close(ctx, scanner); return NULL; } return scanner; }
void test_dynamic_malloc_change(void) { #ifdef USE_DYNAMIC_MALLOC_CHANGE cut_assert_ensue_context(); { grn_ctx *ctx = context; memory = GRN_MALLOC(1); cut_assert_not_null(memory); GRN_FREE(memory); grn_ctx_set_malloc(ctx, malloc_always_fail); memory = GRN_MALLOC(1); cut_assert_null(memory); } #endif }
static void grn_ctx_impl_init(grn_ctx *ctx) { if (!(ctx->impl = GRN_MALLOC(sizeof(struct _grn_ctx_impl)))) { return; } if (!(ctx->impl->segs = grn_io_anon_map(ctx, &ctx->impl->mi, sizeof(grn_io_mapinfo) * N_SEGMENTS))) { GRN_FREE(ctx->impl); ctx->impl = NULL; return; } #ifdef USE_DYNAMIC_MALLOC_CHANGE grn_ctx_impl_init_malloc(ctx); #endif if (!(ctx->impl->values = grn_array_create(ctx, NULL, sizeof(grn_tmp_db_obj), GRN_ARRAY_TINY))) { grn_io_anon_unmap(ctx, &ctx->impl->mi, sizeof(grn_io_mapinfo) * N_SEGMENTS); GRN_FREE(ctx->impl); ctx->impl = NULL; return; } ctx->impl->encoding = ctx->encoding; ctx->impl->lifoseg = -1; ctx->impl->currseg = -1; ctx->impl->db = NULL; ctx->impl->qe = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(void *), 0); ctx->impl->stack_curr = 0; ctx->impl->phs = NIL; ctx->impl->code = NIL; ctx->impl->dump = NIL; ctx->impl->op = GRN_OP_T0LVL; ctx->impl->args = NIL; ctx->impl->envir = NIL; ctx->impl->value = NIL; ctx->impl->ncells = 0; ctx->impl->n_entries = 0; ctx->impl->seqno = 0; ctx->impl->lseqno = 0; ctx->impl->nbinds = 0; ctx->impl->nunbinds = 0; ctx->impl->feed_mode = grn_ql_atonce; ctx->impl->cur = NULL; ctx->impl->str_end = NULL; ctx->impl->batchmode = 0; ctx->impl->gc_verbose = 0; ctx->impl->inbuf = NULL; ctx->impl->co.mode = 0; ctx->impl->co.func = NULL; ctx->impl->objects = NULL; ctx->impl->symbols = NULL; ctx->impl->com = NULL; ctx->impl->outbuf = grn_obj_open(ctx, GRN_BULK, 0, 0); GRN_TEXT_INIT(&ctx->impl->subbuf, 0); }
inline static char * grn_snip_strndup(grn_ctx *ctx, const char *string, unsigned int string_len) { char *copied_string; copied_string = GRN_MALLOC(string_len + 1); if (!copied_string) { return NULL; } memcpy(copied_string, string, string_len); copied_string[string_len]= '\0'; /* not required, but for ql use */ return copied_string; }
static grn_rc uvector_init(grn_ctx *ctx, grn_obj *table, grn_proc_data *user_data) { grn_obj *str; grn_uvector_tokenizer_info *token; if (!(str = grn_ctx_pop(ctx))) { return GRN_INVALID_ARGUMENT; } if (!(token = GRN_MALLOC(sizeof(grn_uvector_tokenizer_info)))) { return ctx->rc; } user_data->ptr = token; token->curr = GRN_TEXT_VALUE(str); token->tail = token->curr + GRN_TEXT_LEN(str); token->unit = sizeof(grn_id); GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY); GRN_UINT32_INIT(&token->stat_, 0); return GRN_SUCCESS; }
static grn_obj * ngram_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data, uint8_t ngram_unit, uint8_t uni_alpha, uint8_t uni_digit, uint8_t uni_symbol, uint8_t ignore_blank) { unsigned int normalize_flags = GRN_STRING_REMOVE_BLANK | GRN_STRING_WITH_TYPES | GRN_STRING_REMOVE_TOKENIZED_DELIMITER; grn_tokenizer_query *query; const char *normalized; unsigned int normalized_length_in_bytes; grn_ngram_tokenizer *tokenizer; query = grn_tokenizer_query_open(ctx, nargs, args, normalize_flags); if (!query) { return NULL; } if (!(tokenizer = GRN_MALLOC(sizeof(grn_ngram_tokenizer)))) { grn_tokenizer_query_close(ctx, query); ERR(GRN_NO_MEMORY_AVAILABLE, "[tokenizer][ngram] " "memory allocation to grn_ngram_tokenizer failed"); return NULL; } user_data->ptr = tokenizer; grn_tokenizer_token_init(ctx, &(tokenizer->token)); tokenizer->query = query; tokenizer->uni_alpha = uni_alpha; tokenizer->uni_digit = uni_digit; tokenizer->uni_symbol = uni_symbol; tokenizer->ngram_unit = ngram_unit; tokenizer->ignore_blank = ignore_blank; tokenizer->overlap = 0; tokenizer->pos = 0; tokenizer->skip = 0; grn_string_get_normalized(ctx, tokenizer->query->normalized_query, &normalized, &normalized_length_in_bytes, &(tokenizer->len)); tokenizer->next = (const unsigned char *)normalized; tokenizer->end = tokenizer->next + normalized_length_in_bytes; tokenizer->ctypes = grn_string_get_types(ctx, tokenizer->query->normalized_query); return NULL; }
static grn_obj * regexp_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { unsigned int normalize_flags = GRN_STRING_WITH_TYPES; grn_tokenizer_query *query; const char *normalized; unsigned int normalized_length_in_bytes; grn_regexp_tokenizer *tokenizer; query = grn_tokenizer_query_open(ctx, nargs, args, normalize_flags); if (!query) { return NULL; } tokenizer = GRN_MALLOC(sizeof(grn_regexp_tokenizer)); if (!tokenizer) { grn_tokenizer_query_close(ctx, query); ERR(GRN_NO_MEMORY_AVAILABLE, "[tokenizer][regexp] failed to allocate memory"); return NULL; } user_data->ptr = tokenizer; grn_tokenizer_token_init(ctx, &(tokenizer->token)); tokenizer->query = query; tokenizer->get.n_skip_tokens = 0; tokenizer->is_begin = GRN_TRUE; tokenizer->is_end = GRN_FALSE; tokenizer->is_start_token = GRN_TRUE; tokenizer->is_overlapping = GRN_FALSE; grn_string_get_normalized(ctx, tokenizer->query->normalized_query, &normalized, &normalized_length_in_bytes, NULL); tokenizer->next = normalized; tokenizer->end = tokenizer->next + normalized_length_in_bytes; tokenizer->nth_char = 0; tokenizer->char_types = grn_string_get_types(ctx, tokenizer->query->normalized_query); GRN_TEXT_INIT(&(tokenizer->buffer), 0); return NULL; }
grn_dat * grn_dat_create(grn_ctx *ctx, const char *path, uint32_t, uint32_t, uint32_t flags) { if (path) { if (path[0] == '\0') { path = NULL; } else if (std::strlen(path) >= (PATH_MAX - (FILE_ID_LENGTH + 1))) { ERR(GRN_FILENAME_TOO_LONG, "too long path"); return NULL; } } grn_dat * const dat = static_cast<grn_dat *>(GRN_MALLOC(sizeof(grn_dat))); if (!dat) { return NULL; } grn_dat_init(ctx, dat); dat->obj.header.flags = flags; dat->io = grn_io_create(ctx, path, sizeof(struct grn_dat_header), 4096, 0, grn_io_auto, GRN_IO_EXPIRE_SEGMENT); if (!dat->io) { GRN_FREE(dat); return NULL; } grn_io_set_type(dat->io, GRN_TABLE_DAT_KEY); dat->header = static_cast<struct grn_dat_header *>(grn_io_header(dat->io)); if (!dat->header) { grn_io_close(ctx, dat->io); grn_dat_remove_file(ctx, path); GRN_FREE(dat); return NULL; } const grn_encoding encoding = (ctx->encoding != GRN_ENC_DEFAULT) ? ctx->encoding : grn_gctx.encoding; dat->header->flags = flags; dat->header->encoding = encoding; dat->header->tokenizer = GRN_ID_NIL; dat->header->file_id = 0; dat->encoding = encoding; dat->tokenizer = NULL; return dat; }
static grn_obj * uvector_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_obj *str; grn_uvector_tokenizer_info *token; if (!(str = grn_ctx_pop(ctx))) { ERR(GRN_INVALID_ARGUMENT, "missing argument"); return NULL; } if (!(token = GRN_MALLOC(sizeof(grn_uvector_tokenizer_info)))) { return NULL; } user_data->ptr = token; token->curr = GRN_TEXT_VALUE(str); token->tail = token->curr + GRN_TEXT_LEN(str); token->unit = sizeof(grn_id); GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY); GRN_UINT32_INIT(&token->stat_, 0); return NULL; }
static grn_obj * delimited_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data, const uint8_t *delimiter, uint32_t delimiter_len) { grn_tokenizer_query *query; unsigned int normalize_flags = 0; const char *normalized; unsigned int normalized_length_in_bytes; grn_delimited_tokenizer *tokenizer; query = grn_tokenizer_query_open(ctx, nargs, args, normalize_flags); if (!query) { return NULL; } if (!(tokenizer = GRN_MALLOC(sizeof(grn_delimited_tokenizer)))) { ERR(GRN_NO_MEMORY_AVAILABLE, "[tokenizer][delimit] " "memory allocation to grn_delimited_tokenizer failed"); grn_tokenizer_query_close(ctx, query); return NULL; } user_data->ptr = tokenizer; tokenizer->query = query; tokenizer->have_tokenized_delimiter = grn_tokenizer_have_tokenized_delimiter(ctx, tokenizer->query->ptr, tokenizer->query->length, tokenizer->query->encoding); tokenizer->delimiter = delimiter; tokenizer->delimiter_len = delimiter_len; grn_string_get_normalized(ctx, tokenizer->query->normalized_query, &normalized, &normalized_length_in_bytes, NULL); tokenizer->next = (const unsigned char *)normalized; tokenizer->end = tokenizer->next + normalized_length_in_bytes; grn_tokenizer_token_init(ctx, &(tokenizer->token)); return NULL; }
grn_token * grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len, grn_token_mode mode) { grn_token *token; grn_encoding encoding; grn_obj *tokenizer; if (grn_table_get_info(ctx, table, NULL, &encoding, &tokenizer)) { return NULL; } if (!(token = GRN_MALLOC(sizeof(grn_token)))) { return NULL; } token->table = table; token->mode = mode; token->encoding = encoding; token->tokenizer = tokenizer; token->orig = str; token->orig_blen = str_len; token->curr = NULL; token->curr_size = 0; token->pos = -1; token->status = grn_token_doing; token->force_prefix = 0; if (tokenizer) { grn_obj str_; GRN_TEXT_INIT(&str_, GRN_OBJ_DO_SHALLOW_COPY); GRN_TEXT_SET_REF(&str_, str, str_len); token->pctx.caller = NULL; token->pctx.user_data.ptr = NULL; token->pctx.proc = (grn_proc *)tokenizer; token->pctx.hooks = NULL; token->pctx.currh = NULL; token->pctx.phase = PROC_INIT; grn_ctx_push(ctx, &str_); ((grn_proc *)tokenizer)->funcs[PROC_INIT](ctx, 1, &table, &token->pctx.user_data); grn_obj_close(ctx, &str_); } if (ctx->rc) { GRN_FREE(token); token = NULL; } return token; }
static grn_obj * ngram_init(grn_ctx *ctx, grn_obj *table, grn_user_data *user_data, uint8_t ngram_unit, uint8_t uni_alpha, uint8_t uni_digit, uint8_t uni_symbol, uint8_t ignore_blank) { grn_obj *str; int nflags = GRN_STR_REMOVEBLANK|GRN_STR_WITH_CTYPES; grn_ngram_tokenizer *token; grn_obj_flags table_flags; if (!(str = grn_ctx_pop(ctx))) { ERR(GRN_INVALID_ARGUMENT, "missing argument"); return NULL; } if (!(token = GRN_MALLOC(sizeof(grn_ngram_tokenizer)))) { return NULL; } user_data->ptr = token; token->uni_alpha = uni_alpha; token->uni_digit = uni_digit; token->uni_symbol = uni_symbol; token->ngram_unit = ngram_unit; token->ignore_blank = ignore_blank; token->overlap = 0; token->pos = 0; token->skip = 0; grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL); nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE); if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str), nflags, token->encoding))) { GRN_FREE(token); ERR(GRN_TOKENIZER_ERROR, "grn_str_open failed at grn_token_open"); return NULL; } token->next = (unsigned char *)token->nstr->norm; token->end = token->next + token->nstr->norm_blen; token->ctypes = token->nstr->ctypes; token->len = token->nstr->length; GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY); GRN_UINT32_INIT(&token->stat_, 0); return NULL; }
grn_command_input * grn_command_input_open(grn_ctx *ctx, grn_obj *command) { grn_command_input *input = NULL; GRN_API_ENTER; input = GRN_MALLOC(sizeof(grn_command_input)); if (!input) { ERR(GRN_NO_MEMORY_AVAILABLE, "[command-input] failed to allocate grn_command_input"); goto exit; } input->command = command; /* TODO: Allocate by self. */ { uint32_t n; input->arguments = grn_expr_get_vars(ctx, input->command, &n); } exit : GRN_API_RETURN(input); }
inline static grn_obj * eucjp_normalize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { static uint16_t hankana[] = { 0xa1a1, 0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa5f2, 0xa5a1, 0xa5a3, 0xa5a5, 0xa5a7, 0xa5a9, 0xa5e3, 0xa5e5, 0xa5e7, 0xa5c3, 0xa1bc, 0xa5a2, 0xa5a4, 0xa5a6, 0xa5a8, 0xa5aa, 0xa5ab, 0xa5ad, 0xa5af, 0xa5b1, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b9, 0xa5bb, 0xa5bd, 0xa5bf, 0xa5c1, 0xa5c4, 0xa5c6, 0xa5c8, 0xa5ca, 0xa5cb, 0xa5cc, 0xa5cd, 0xa5ce, 0xa5cf, 0xa5d2, 0xa5d5, 0xa5d8, 0xa5db, 0xa5de, 0xa5df, 0xa5e0, 0xa5e1, 0xa5e2, 0xa5e4, 0xa5e6, 0xa5e8, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5ef, 0xa5f3, 0xa1ab, 0xa1eb }; static unsigned char dakuten[] = { 0xf4, 0, 0, 0, 0, 0xac, 0, 0xae, 0, 0xb0, 0, 0xb2, 0, 0xb4, 0, 0xb6, 0, 0xb8, 0, 0xba, 0, 0xbc, 0, 0xbe, 0, 0xc0, 0, 0xc2, 0, 0, 0xc5, 0, 0xc7, 0, 0xc9, 0, 0, 0, 0, 0, 0, 0xd0, 0, 0, 0xd3, 0, 0, 0xd6, 0, 0, 0xd9, 0, 0, 0xdc }; static unsigned char handaku[] = { 0xd1, 0, 0, 0xd4, 0, 0, 0xd7, 0, 0, 0xda, 0, 0, 0xdd }; grn_string *nstr = (grn_string *)args[0]; int16_t *ch; const unsigned char *s, *s_, *e; unsigned char *d, *d0, *d_, b; uint_least8_t *cp, *ctypes, ctype; size_t size = nstr->original_length_in_bytes, length = 0; int removeblankp = nstr->flags & GRN_STRING_REMOVE_BLANK; if (!(nstr->normalized = GRN_MALLOC(size * 2 + 1))) { ERR(GRN_NO_MEMORY_AVAILABLE, "[strinig][eucjp] failed to allocate normalized text space"); return NULL; } d0 = (unsigned char *) nstr->normalized; if (nstr->flags & GRN_STRING_WITH_CHECKS) { if (!(nstr->checks = GRN_MALLOC(size * 2 * sizeof(int16_t) + 1))) { GRN_FREE(nstr->normalized); nstr->normalized = NULL; ERR(GRN_NO_MEMORY_AVAILABLE, "[strinig][eucjp] failed to allocate checks space"); return NULL; } } ch = nstr->checks; if (nstr->flags & GRN_STRING_WITH_TYPES) { if (!(nstr->ctypes = GRN_MALLOC(size + 1))) { GRN_FREE(nstr->checks); GRN_FREE(nstr->normalized); nstr->checks = NULL; nstr->normalized = NULL; ERR(GRN_NO_MEMORY_AVAILABLE, "[strinig][eucjp] failed to allocate character types space"); return NULL; } } cp = ctypes = nstr->ctypes; e = (unsigned char *)nstr->original + size; for (s = s_ = (unsigned char *) nstr->original, d = d_ = d0; s < e; s++) { if ((*s & 0x80)) { if (((s + 1) < e) && (*(s + 1) & 0x80)) { unsigned char c1 = *s++, c2 = *s, c3 = 0; switch (c1 >> 4) { case 0x08 : if (c1 == 0x8e && 0xa0 <= c2 && c2 <= 0xdf) { uint16_t c = hankana[c2 - 0xa0]; switch (c) { case 0xa1ab : if (d > d0 + 1 && d[-2] == 0xa5 && 0xa6 <= d[-1] && d[-1] <= 0xdb && (b = dakuten[d[-1] - 0xa6])) { *(d - 1) = b; if (ch) { ch[-1] += 2; s_ += 2; } continue; } else { *d++ = c >> 8; *d = c & 0xff; } break; case 0xa1eb : if (d > d0 + 1 && d[-2] == 0xa5 && 0xcf <= d[-1] && d[-1] <= 0xdb && (b = handaku[d[-1] - 0xcf])) { *(d - 1) = b; if (ch) { ch[-1] += 2; s_ += 2; } continue; } else { *d++ = c >> 8; *d = c & 0xff; } break; default : *d++ = c >> 8; *d = c & 0xff; break; } ctype = grn_char_katakana; } else { *d++ = c1; *d = c2; ctype = grn_char_others; } break; case 0x09 : *d++ = c1; *d = c2; ctype = grn_char_others; break; case 0x0a : switch (c1 & 0x0f) { case 1 : switch (c2) { case 0xbc : *d++ = c1; *d = c2; ctype = grn_char_katakana; break; case 0xb9 : *d++ = c1; *d = c2; ctype = grn_char_kanji; break; case 0xa1 : if (removeblankp) { if (cp > ctypes) { *(cp - 1) |= GRN_CHAR_BLANK; } continue; } else { *d = ' '; ctype = GRN_CHAR_BLANK|grn_char_symbol; } break; default : if (c2 >= 0xa4 && (c3 = symbol[c2 - 0xa4])) { *d = c3; ctype = grn_char_symbol; } else { *d++ = c1; *d = c2; ctype = grn_char_others; } break; } break; case 2 : *d++ = c1; *d = c2; ctype = grn_char_symbol; break; case 3 : c3 = c2 - 0x80; if ('a' <= c3 && c3 <= 'z') { ctype = grn_char_alpha; *d = c3; } else if ('A' <= c3 && c3 <= 'Z') { ctype = grn_char_alpha; *d = c3 + 0x20; } else if ('0' <= c3 && c3 <= '9') { ctype = grn_char_digit; *d = c3; } else { ctype = grn_char_others; *d++ = c1; *d = c2; } break; case 4 : *d++ = c1; *d = c2; ctype = grn_char_hiragana; break; case 5 : *d++ = c1; *d = c2; ctype = grn_char_katakana; break; case 6 : case 7 : case 8 : *d++ = c1; *d = c2; ctype = grn_char_symbol; break; default : *d++ = c1; *d = c2; ctype = grn_char_others; break; } break; default : *d++ = c1; *d = c2; ctype = grn_char_kanji; break; } } else {
grn_dat_cursor * grn_dat_cursor_open(grn_ctx *ctx, grn_dat *dat, const void *min, unsigned int min_size, const void *max, unsigned int max_size, int offset, int limit, int flags) { if (!grn_dat_open_trie_if_needed(ctx, dat)) { return NULL; } grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); if (!trie) { grn_dat_cursor * const dc = static_cast<grn_dat_cursor *>(GRN_MALLOC(sizeof(grn_dat_cursor))); if (dc) { grn_dat_cursor_init(ctx, dc); } return dc; } grn_dat_cursor * const dc = static_cast<grn_dat_cursor *>(GRN_MALLOC(sizeof(grn_dat_cursor))); if (!dc) { return NULL; } grn_dat_cursor_init(ctx, dc); try { if ((flags & GRN_CURSOR_BY_ID) != 0) { dc->cursor = grn::dat::CursorFactory::open(*trie, min, min_size, max, max_size, offset, limit, grn::dat::ID_RANGE_CURSOR | ((flags & GRN_CURSOR_DESCENDING) ? grn::dat::DESCENDING_CURSOR : 0) | ((flags & GRN_CURSOR_GT) ? grn::dat::EXCEPT_LOWER_BOUND : 0) | ((flags & GRN_CURSOR_LT) ? grn::dat::EXCEPT_UPPER_BOUND : 0)); } else if ((flags & GRN_CURSOR_PREFIX) != 0) { if (max && max_size) { if ((dat->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) != 0) { dc->cursor = grn::dat::CursorFactory::open(*trie, NULL, min_size, max, max_size, offset, limit, grn::dat::PREFIX_CURSOR | grn::dat::DESCENDING_CURSOR); } else { // TODO: near } } else if (min && min_size) { if ((flags & GRN_CURSOR_RK) != 0) { // TODO: rk search } else { dc->cursor = grn::dat::CursorFactory::open(*trie, min, min_size, NULL, 0, offset, limit, grn::dat::PREDICTIVE_CURSOR | ((flags & GRN_CURSOR_DESCENDING) ? grn::dat::DESCENDING_CURSOR : 0) | ((flags & GRN_CURSOR_GT) ? grn::dat::EXCEPT_EXACT_MATCH : 0)); } } } else { dc->cursor = grn::dat::CursorFactory::open(*trie, min, min_size, max, max_size, offset, limit, grn::dat::KEY_RANGE_CURSOR | ((flags & GRN_CURSOR_DESCENDING) ? grn::dat::DESCENDING_CURSOR : 0) | ((flags & GRN_CURSOR_GT) ? grn::dat::EXCEPT_LOWER_BOUND : 0) | ((flags & GRN_CURSOR_LT) ? grn::dat::EXCEPT_UPPER_BOUND : 0)); } } catch (const grn::dat::Exception &ex) { ERR(grn_dat_translate_error_code(ex.code()), "grn::dat::CursorFactory::open failed"); GRN_FREE(dc); return NULL; } if (!dc->cursor) { ERR(GRN_INVALID_ARGUMENT, "unsupported query"); GRN_FREE(dc); return NULL; } dc->dat = dat; return dc; }
grn_token * grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len, grn_token_mode mode, unsigned int flags) { grn_token *token; grn_encoding encoding; grn_obj *tokenizer; grn_obj *normalizer; grn_obj_flags table_flags; if (grn_table_get_info(ctx, table, &table_flags, &encoding, &tokenizer, &normalizer)) { return NULL; } if (!(token = GRN_MALLOC(sizeof(grn_token)))) { return NULL; } token->table = table; token->mode = mode; token->encoding = encoding; token->tokenizer = tokenizer; token->orig = str; token->orig_blen = str_len; token->curr = NULL; token->nstr = NULL; token->curr_size = 0; token->pos = -1; token->status = GRN_TOKEN_DOING; token->force_prefix = 0; if (tokenizer) { grn_obj str_, flags_; GRN_TEXT_INIT(&str_, GRN_OBJ_DO_SHALLOW_COPY); GRN_TEXT_SET_REF(&str_, str, str_len); GRN_UINT32_INIT(&flags_, 0); GRN_UINT32_SET(ctx, &flags_, flags); token->pctx.caller = NULL; token->pctx.user_data.ptr = NULL; token->pctx.proc = (grn_proc *)tokenizer; token->pctx.hooks = NULL; token->pctx.currh = NULL; token->pctx.phase = PROC_INIT; grn_ctx_push(ctx, &str_); grn_ctx_push(ctx, &flags_); ((grn_proc *)tokenizer)->funcs[PROC_INIT](ctx, 1, &table, &token->pctx.user_data); grn_obj_close(ctx, &flags_); grn_obj_close(ctx, &str_); } else { int nflags = 0; token->nstr = grn_string_open_(ctx, str, str_len, normalizer, nflags, token->encoding); if (token->nstr) { const char *normalized; grn_string_get_normalized(ctx, token->nstr, &normalized, &(token->curr_size), NULL); token->curr = (const unsigned char *)normalized; } else { ERR(GRN_TOKENIZER_ERROR, "grn_string_open failed at grn_token_open"); } } if (ctx->rc) { grn_token_close(ctx, token); token = NULL; } return token; }
static grn_rc mecab_init(grn_ctx *ctx, grn_obj *table, grn_proc_data *user_data) { grn_obj *str; int nflags = 0; char *buf, *s, *p; char mecab_err[256]; grn_obj_flags table_flags; grn_mecab_tokenizer *token; unsigned int bufsize, maxtrial = 10, len; if (!(str = grn_ctx_pop(ctx))) { return GRN_INVALID_ARGUMENT; } SOLE_MECAB_CONFIRM; if (!sole_mecab) { GRN_LOG(ctx, GRN_LOG_ALERT, "mecab_new failed on grn_mecab_init"); return GRN_TOKENIZER_ERROR; } if (!(token = GRN_MALLOC(sizeof(grn_mecab_tokenizer)))) { return ctx->rc; } user_data->ptr = token; token->mecab = sole_mecab; // if (!(token->mecab = mecab_new3())) { grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL); nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE); if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str), nflags, token->encoding))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_str_open failed at grn_token_open"); return GRN_TOKENIZER_ERROR; } len = token->nstr->norm_blen; mecab_err[sizeof(mecab_err) - 1] = '\0'; for (bufsize = len * 2 + 1; maxtrial; bufsize *= 2, maxtrial--) { if(!(buf = GRN_MALLOC(bufsize + 1))) { GRN_LOG(ctx, GRN_LOG_ALERT, "buffer allocation on mecab_init failed !"); GRN_FREE(token); return ctx->rc; } MUTEX_LOCK(sole_mecab_lock); s = mecab_sparse_tostr3(token->mecab, token->nstr->norm, len, buf, bufsize); if (!s) { strncpy(mecab_err, mecab_strerror(token->mecab), sizeof(mecab_err) - 1); } MUTEX_UNLOCK(sole_mecab_lock); if (s) { break; } GRN_FREE(buf); if (strstr(mecab_err, "output buffer overflow") == NULL) { break; } } if (!s) { GRN_LOG(ctx, GRN_LOG_ALERT, "mecab_sparse_tostr failed len=%d bufsize=%d err=%s", len, bufsize, mecab_err); GRN_FREE(token); return GRN_TOKENIZER_ERROR; } // certain version of mecab returns trailing lf or spaces. for (p = buf + strlen(buf) - 1; buf <= p && (*p == '\n' || isspace(*(unsigned char *)p)); p--) { *p = '\0'; } //grn_log("sparsed='%s'", s); token->buf = (unsigned char *)buf; token->next = (unsigned char *)buf; token->end = (unsigned char *)buf + strlen(buf); GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY); GRN_UINT32_INIT(&token->stat_, 0); return GRN_SUCCESS; }
grn_snip * grn_snip_open(grn_ctx *ctx, int flags, unsigned int width, unsigned int max_results, const char *defaultopentag, unsigned int defaultopentag_len, const char *defaultclosetag, unsigned int defaultclosetag_len, grn_snip_mapping *mapping) { int copy_tag; grn_snip *ret = NULL; if (!(ret = GRN_MALLOC(sizeof(grn_snip)))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_snip allocation failed on grn_snip_open"); return NULL; } if (max_results > MAX_SNIP_RESULT_COUNT || max_results == 0) { GRN_LOG(ctx, GRN_LOG_WARNING, "max_results is invalid on grn_snip_open"); GRN_FREE(ret); return NULL; } GRN_API_ENTER; ret->encoding = ctx->encoding; ret->flags = flags; ret->width = width; ret->max_results = max_results; ret->defaultopentag = NULL; ret->defaultclosetag = NULL; copy_tag = flags & GRN_SNIP_COPY_TAG; if (grn_snip_set_default_tag(ctx, &(ret->defaultopentag), &(ret->defaultopentag_len), defaultopentag, defaultopentag_len, copy_tag)) { GRN_FREE(ret); GRN_API_RETURN(NULL); } if (grn_snip_set_default_tag(ctx, &(ret->defaultclosetag), &(ret->defaultclosetag_len), defaultclosetag, defaultclosetag_len, copy_tag)) { if (copy_tag && ret->defaultopentag) { GRN_FREE((void *)ret->defaultopentag); } GRN_FREE(ret); GRN_API_RETURN(NULL); } ret->cond_len = 0; ret->mapping = mapping; ret->nstr = NULL; ret->tag_count = 0; ret->snip_count = 0; if (ret->flags & GRN_SNIP_NORMALIZE) { ret->normalizer = GRN_NORMALIZER_AUTO; } else { ret->normalizer = NULL; } GRN_DB_OBJ_SET_TYPE(ret, GRN_SNIP); { grn_obj *db; grn_id id; db = grn_ctx_db(ctx); id = grn_obj_register(ctx, db, NULL, 0); DB_OBJ(ret)->header.domain = GRN_ID_NIL; DB_OBJ(ret)->range = GRN_ID_NIL; grn_db_obj_init(ctx, db, id, DB_OBJ(ret)); } GRN_API_RETURN(ret); }
/* This function is called for a full text search query or a document to be indexed. This means that both short/long strings are given. The return value of this function is ignored. When an error occurs in this function, `ctx->rc' is overwritten with an error code (not GRN_SUCCESS). */ static grn_obj * mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_obj *str; int nflags = 0; char *buf, *p; const char *s; grn_obj *table = args[0]; grn_obj_flags table_flags; grn_encoding table_encoding; grn_mecab_tokenizer *token; unsigned int bufsize, len; if (!(str = grn_ctx_pop(ctx))) { ERR(GRN_INVALID_ARGUMENT, "missing argument"); return NULL; } if (!sole_mecab) { CRITICAL_SECTION_ENTER(sole_mecab_lock); if (!sole_mecab) { sole_mecab = mecab_new2("-Owakati"); if (!sole_mecab) { ERR(GRN_TOKENIZER_ERROR, "mecab_new2 failed on grn_mecab_init: %s", mecab_strerror(NULL)); } else { sole_mecab_encoding = get_mecab_encoding(sole_mecab); } } CRITICAL_SECTION_LEAVE(sole_mecab_lock); } if (!sole_mecab) { return NULL; } grn_table_get_info(ctx, table, &table_flags, &table_encoding, NULL); if (table_encoding != sole_mecab_encoding) { ERR(GRN_TOKENIZER_ERROR, "MeCab dictionary charset (%s) does not match the context encoding: <%s>", grn_enctostr(sole_mecab_encoding), grn_enctostr(table_encoding)); return NULL; } if (!(token = GRN_MALLOC(sizeof(grn_mecab_tokenizer)))) { return NULL; } token->mecab = sole_mecab; token->encoding = table_encoding; nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE); if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str), nflags, token->encoding))) { GRN_FREE(token); ERR(GRN_TOKENIZER_ERROR, "grn_str_open failed at grn_token_open"); return NULL; } len = token->nstr->norm_blen; CRITICAL_SECTION_ENTER(sole_mecab_lock); s = mecab_sparse_tostr2(token->mecab, token->nstr->norm, len); if (!s) { ERR(GRN_TOKENIZER_ERROR, "mecab_sparse_tostr failed len=%d err=%s", len, mecab_strerror(token->mecab)); } else { bufsize = strlen(s) + 1; if (!(buf = GRN_MALLOC(bufsize))) { GRN_LOG(ctx, GRN_LOG_ALERT, "buffer allocation on mecab_init failed !"); } else { memcpy(buf, s, bufsize); } } CRITICAL_SECTION_LEAVE(sole_mecab_lock); if (!s || !buf) { grn_str_close(ctx, token->nstr); GRN_FREE(token); return NULL; } /* A certain version of mecab returns trailing lf or spaces. */ for (p = buf + bufsize - 2; buf <= p && isspace(*(unsigned char *)p); p--) { *p = '\0'; } user_data->ptr = token; token->buf = buf; token->next = buf; token->end = p + 1; GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY); GRN_UINT32_INIT(&token->stat_, 0); return NULL; }
static int do_client() { int rc = -1; char *buf; grn_thread thread; struct timeval tvb, tve; grn_com_header sheader; grn_ctx ctx_, *ctx = &ctx_; grn_ctx_init(ctx, 0); GRN_COM_QUEUE_INIT(&fsessions); sessions = grn_hash_create(ctx, NULL, sizeof(grn_sock), sizeof(session), 0); sheader.proto = GRN_COM_PROTO_GQTP; sheader.qtype = 0; sheader.keylen = 0; sheader.level = 0; sheader.flags = 0; sheader.status = 0; sheader.opaque = 0; sheader.cas = 0; if ((buf = GRN_MALLOC(BUFSIZE))) { if (!grn_com_event_init(ctx, &ev, 1000, sizeof(grn_com))) { ev.msg_handler = msg_handler; if (!THREAD_CREATE(thread, receiver, NULL)) { int cnt = 0; gettimeofday(&tvb, NULL); lprint(ctx, "begin: max_concurrency=%d max_tp=%d", max_con, max_tp); while (fgets(buf, BUFSIZE, stdin)) { uint32_t size = strlen(buf) - 1; session *s = session_alloc(ctx, dests + (cnt++ % dest_cnt)); if (s) { gettimeofday(&s->tv, NULL); s->n_query++; s->query_id = ++nsent; s->n_sessions = (nsent - nrecv); switch (proto) { case 'H' : case 'h' : if (grn_com_send_text(ctx, s->com, buf, size, 0)) { fprintf(stderr, "grn_com_send_text failed\n"); } s->stat = 2; /* lprint(ctx, "sent %04d %04d %d", s->n_query, s->query_id, s->com->fd); */ break; default : if (grn_com_send(ctx, s->com, &sheader, buf, size, 0)) { fprintf(stderr, "grn_com_send failed\n"); } break; } } else { fprintf(stderr, "grn_com_copen failed\n"); } for (;;) { gettimeofday(&tve, NULL); if ((nrecv < max_tp * (tve.tv_sec - tvb.tv_sec)) && (nsent - nrecv) < max_con) { break; } /* lprint(ctx, "s:%d r:%d", nsent, nrecv); */ usleep(1000); } if (!(nsent % 1000)) { lprint(ctx, " : %d", nsent); } } done = 1; pthread_join(thread, NULL); gettimeofday(&tve, NULL); { double qps; uint64_t etime = (tve.tv_sec - tvb.tv_sec); etime *= 1000000; etime += (tve.tv_usec - tvb.tv_usec); qps = (double)nsent * 1000000 / etime; lprint(ctx, "end : n=%d min=%d max=%d avg=%d qps=%f etime=%d.%06d", nsent, etime_min, etime_max, (int)(etime_amount / nsent), qps, etime / 1000000, etime % 1000000); } { session *s; GRN_HASH_EACH(ctx, sessions, id, NULL, NULL, &s, { session_close(ctx, s); }); } rc = 0; } else {
static grn_obj * regexp_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { unsigned int normalize_flags = 0; grn_tokenizer_query *query; const char *normalized; unsigned int normalized_length_in_bytes; grn_regexp_tokenizer *tokenizer; query = grn_tokenizer_query_open(ctx, nargs, args, normalize_flags); if (!query) { return NULL; } tokenizer = GRN_MALLOC(sizeof(grn_regexp_tokenizer)); if (!tokenizer) { grn_tokenizer_query_close(ctx, query); ERR(GRN_NO_MEMORY_AVAILABLE, "[tokenizer][regexp] failed to allocate memory"); return NULL; } user_data->ptr = tokenizer; grn_tokenizer_token_init(ctx, &(tokenizer->token)); tokenizer->query = query; tokenizer->get.have_begin = GRN_FALSE; tokenizer->get.have_end = GRN_FALSE; tokenizer->get.n_skip_tokens = 0; tokenizer->is_begin = GRN_TRUE; tokenizer->is_end = GRN_FALSE; tokenizer->is_first_token = GRN_TRUE; tokenizer->is_overlapping = GRN_FALSE; grn_string_get_normalized(ctx, tokenizer->query->normalized_query, &normalized, &normalized_length_in_bytes, NULL); tokenizer->next = normalized; tokenizer->end = tokenizer->next + normalized_length_in_bytes; if (tokenizer->query->tokenize_mode == GRN_TOKEN_GET) { unsigned int query_length = tokenizer->query->length; if (query_length >= 2) { const char *query_string = tokenizer->query->ptr; grn_encoding encoding = tokenizer->query->encoding; if (query_string[0] == '\\' && query_string[1] == 'A') { tokenizer->get.have_begin = GRN_TRUE; /* TODO: It assumes that both "\\" and "A" are normalized to 1 characters. Normalizer may omit character or expand to multiple characters. */ tokenizer->next += grn_charlen_(ctx, tokenizer->next, tokenizer->end, encoding); tokenizer->next += grn_charlen_(ctx, tokenizer->next, tokenizer->end, encoding); } if (query_string[query_length - 2] == '\\' && query_string[query_length - 1] == 'z') { tokenizer->get.have_end = GRN_TRUE; /* TODO: It assumes that both "\\" and "z" are normalized to 1 byte characters. Normalizer may omit character or expand to multiple characters. */ tokenizer->end -= grn_charlen_(ctx, tokenizer->end - 1, tokenizer->end, encoding); tokenizer->end -= grn_charlen_(ctx, tokenizer->end - 1, tokenizer->end, encoding); } } } GRN_TEXT_INIT(&(tokenizer->buffer), 0); return NULL; }
static grn_string * grn_fake_string_open(grn_ctx *ctx, grn_string *string) { /* TODO: support GRN_STRING_REMOVE_BLANK flag and ctypes */ grn_string *nstr = string; const char *str; unsigned int str_len; str = nstr->original; str_len = nstr->original_length_in_bytes; if (!(nstr->normalized = GRN_MALLOC(str_len + 1))) { ERR(GRN_NO_MEMORY_AVAILABLE, "[strinig][fake] failed to allocate normalized text space"); grn_string_close(ctx, (grn_obj *)nstr); return NULL; } if (nstr->flags & GRN_STRING_REMOVE_TOKENIZED_DELIMITER && ctx->encoding == GRN_ENC_UTF8) { int char_length; const char *source_current = str; const char *source_end = str + str_len; char *destination = nstr->normalized; unsigned int destination_length = 0; while ((char_length = grn_charlen(ctx, source_current, source_end)) > 0) { if (!grn_tokenizer_is_tokenized_delimiter(ctx, source_current, char_length, ctx->encoding)) { memcpy(destination, source_current, char_length); destination += char_length; destination_length += char_length; } source_current += char_length; } nstr->normalized[destination_length] = '\0'; nstr->normalized_length_in_bytes = destination_length; } else { memcpy(nstr->normalized, str, str_len); nstr->normalized[str_len] = '\0'; nstr->normalized_length_in_bytes = str_len; } if (nstr->flags & GRN_STRING_WITH_CHECKS) { int16_t f = 0; unsigned char c; size_t i; if (!(nstr->checks = (int16_t *) GRN_MALLOC(sizeof(int16_t) * str_len))) { grn_string_close(ctx, (grn_obj *)nstr); ERR(GRN_NO_MEMORY_AVAILABLE, "[strinig][fake] failed to allocate checks space"); return NULL; } switch (nstr->encoding) { case GRN_ENC_EUC_JP: for (i = 0; i < str_len; i++) { if (!f) { c = (unsigned char) str[i]; f = ((c >= 0xa1U && c <= 0xfeU) || c == 0x8eU ? 2 : (c == 0x8fU ? 3 : 1) ); nstr->checks[i] = f; } else { nstr->checks[i] = 0; } f--; } break; case GRN_ENC_SJIS: for (i = 0; i < str_len; i++) { if (!f) { c = (unsigned char) str[i]; f = (c >= 0x81U && ((c <= 0x9fU) || (c >= 0xe0U && c <= 0xfcU)) ? 2 : 1); nstr->checks[i] = f; } else { nstr->checks[i] = 0; } f--; } break; case GRN_ENC_UTF8: for (i = 0; i < str_len; i++) { if (!f) { c = (unsigned char) str[i]; f = (c & 0x80U ? (c & 0x20U ? (c & 0x10U ? 4 : 3) : 2) : 1); nstr->checks[i] = f; } else { nstr->checks[i] = 0; } f--; } break; default: for (i = 0; i < str_len; i++) { nstr->checks[i] = 1; } break; } } return nstr; }