inline static grn_cell * get_phrase(grn_ctx *ctx, grn_query *q) { char *start, *s, *d; start = s = d = q->cur; while (1) { unsigned int len; if (s >= q->str_end) { q->cur = s; break; } len = grn_charlen(ctx, s, q->str_end); if (len == 0) { /* invalid string containing malformed multibyte char */ return NULL; } else if (len == 1) { if (*s == GRN_QUERY_QUOTER) { q->cur = s + 1; break; } else if (*s == GRN_QUERY_ESCAPE && s + 1 < q->str_end) { s++; len = grn_charlen(ctx, s, q->str_end); } } while (len--) { *d++ = *s++; } } return token_new(q, start, d); }
inline static grn_cell * get_word(grn_ctx *ctx, grn_query *q, int *prefixp) { char *start = q->cur, *end; unsigned int len; for (end = q->cur;; ) { /* null check and length check */ if (!(len = grn_charlen(ctx, end, q->str_end))) { q->cur = q->str_end; break; } if (grn_isspace(end, q->encoding) || *end == GRN_QUERY_PARENR) { q->cur = end; break; } if (*end == GRN_QUERY_PREFIX) { *prefixp = 1; q->cur = end + 1; break; } end += len; } return token_new(q, start, end); }
inline static void skip_space(grn_ctx *ctx, grn_query *q) { unsigned int len; while (q->cur < q->str_end && grn_isspace(q->cur, q->encoding)) { /* null check and length check */ if (!(len = grn_charlen(ctx, q->cur, q->str_end))) { q->cur = q->str_end; break; } q->cur += len; } }
static uint32_t calc_edit_distance(grn_ctx *ctx, char *sx, char *ex, char *sy, char *ey, int flags) { int d = 0; uint32_t cx, lx, cy, ly, *dists; char *px, *py; for (px = sx, lx = 0; px < ex && (cx = grn_charlen(ctx, px, ex)); px += cx, lx++); for (py = sy, ly = 0; py < ey && (cy = grn_charlen(ctx, py, ey)); py += cy, ly++); if ((dists = GRN_PLUGIN_MALLOC(ctx, (lx + 1) * (ly + 1) * sizeof(uint32_t)))) { uint32_t x, y; for (x = 0; x <= lx; x++) { DIST(x, 0) = x; } for (y = 0; y <= ly; y++) { DIST(0, y) = y; } for (x = 1, px = sx; x <= lx; x++, px += cx) { cx = grn_charlen(ctx, px, ex); for (y = 1, py = sy; y <= ly; y++, py += cy) { cy = grn_charlen(ctx, py, ey); if (cx == cy && !memcmp(px, py, cx)) { DIST(x, y) = DIST(x - 1, y - 1); } else { uint32_t a = DIST(x - 1, y) + 1; uint32_t b = DIST(x, y - 1) + 1; uint32_t c = DIST(x - 1, y - 1) + 1; DIST(x, y) = ((a < b) ? ((a < c) ? a : c) : ((b < c) ? b : c)); if (flags & GRN_TABLE_FUZZY_SEARCH_WITH_TRANSPOSITION && x > 1 && y > 1 && cx == cy && memcmp(px, py - cy, cx) == 0 && memcmp(px - cx, py, cx) == 0) { uint32_t t = DIST(x - 2, y - 2) + 1; DIST(x, y) = ((DIST(x, y) < t) ? DIST(x, y) : t); } } } } d = DIST(lx, ly); GRN_PLUGIN_FREE(ctx, dists); } return d; }
void test_charlen_broken(gconstpointer data) { const gchar *input, *encoded_input, *encoded_input_end; grn_encoding encoding; gint input_length; encoding = gcut_data_get_int(data, "encoding"); GRN_CTX_SET_ENCODING(&context, encoding); input = gcut_data_get_string(data, "input"); input_length = gcut_data_get_int(data, "input-length"); encoded_input = convert_encoding(input, encoding); if (input_length < 0) { input_length = strlen(encoded_input); } encoded_input_end = encoded_input + input_length; cut_assert_equal_uint(0, grn_charlen(&context, encoded_input, encoded_input_end)); }
static grn_rc selector_fuzzy_search(grn_ctx *ctx, grn_obj *table, grn_obj *index, int nargs, grn_obj **args, grn_obj *res, grn_operator op) { grn_rc rc = GRN_SUCCESS; grn_obj *target = NULL; grn_obj *obj; grn_obj *query; uint32_t max_distance = 1; uint32_t prefix_length = 0; uint32_t prefix_match_size = 0; uint32_t max_expansion = 0; int flags = 0; grn_bool use_sequential_search = GRN_FALSE; if ((nargs - 1) < 2) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "fuzzy_search(): wrong number of arguments (%d ...)", nargs - 1); rc = ctx->rc; goto exit; } obj = args[1]; query = args[2]; if (nargs == 4) { grn_obj *options = args[3]; grn_hash_cursor *cursor; void *key; grn_obj *value; int key_size; if (options->header.type != GRN_TABLE_HASH_KEY) { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "fuzzy_search(): " "3rd argument must be object literal: <%.*s>", (int)GRN_TEXT_LEN(options), GRN_TEXT_VALUE(options)); goto exit; } cursor = grn_hash_cursor_open(ctx, (grn_hash *)options, NULL, 0, NULL, 0, 0, -1, 0); if (!cursor) { GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE, "fuzzy_search(): couldn't open cursor"); goto exit; } while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) { grn_hash_cursor_get_key_value(ctx, cursor, &key, &key_size, (void **)&value); if (key_size == 12 && !memcmp(key, "max_distance", 12)) { max_distance = GRN_UINT32_VALUE(value); } else if (key_size == 13 && !memcmp(key, "prefix_length", 13)) { prefix_length = GRN_UINT32_VALUE(value); } else if (key_size == 13 && !memcmp(key, "max_expansion", 13)) { max_expansion = GRN_UINT32_VALUE(value); } else if (key_size == 18 && !memcmp(key, "with_transposition", 18)) { if (GRN_BOOL_VALUE(value)) { flags |= GRN_TABLE_FUZZY_SEARCH_WITH_TRANSPOSITION; } } else { GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "invalid option name: <%.*s>", key_size, (char *)key); grn_hash_cursor_close(ctx, cursor); goto exit; } } grn_hash_cursor_close(ctx, cursor); } if (index) { target = index; } else { if (obj->header.type == GRN_COLUMN_INDEX) { target = obj; } else { grn_column_index(ctx, obj, GRN_OP_FUZZY, &target, 1, NULL); } } if (target) { grn_obj *lexicon; use_sequential_search = GRN_TRUE; lexicon = grn_ctx_at(ctx, target->header.domain); if (lexicon) { if (lexicon->header.type == GRN_TABLE_PAT_KEY) { use_sequential_search = GRN_FALSE; } grn_obj_unlink(ctx, lexicon); } } else { if (grn_obj_is_key_accessor(ctx, obj) && table->header.type == GRN_TABLE_PAT_KEY) { target = table; } else { use_sequential_search = GRN_TRUE; } } if (prefix_length) { const char *s = GRN_TEXT_VALUE(query); const char *e = GRN_BULK_CURR(query); const char *p; unsigned int cl = 0; unsigned int length = 0; for (p = s; p < e && (cl = grn_charlen(ctx, p, e)); p += cl) { length++; if (length > prefix_length) { break; } } prefix_match_size = p - s; } if (use_sequential_search) { rc = sequential_fuzzy_search(ctx, table, obj, query, max_distance, prefix_match_size, max_expansion, flags, res, op); goto exit; } if (!target) { grn_obj inspected; GRN_TEXT_INIT(&inspected, 0); grn_inspect(ctx, &inspected, target); GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "fuzzy_search(): " "column must be COLUMN_INDEX or TABLE_PAT_KEY: <%.*s>", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); rc = ctx->rc; GRN_OBJ_FIN(ctx, &inspected); } else { grn_search_optarg options = {0}; options.mode = GRN_OP_FUZZY; options.fuzzy.prefix_match_size = prefix_match_size; options.fuzzy.max_distance = max_distance; options.fuzzy.max_expansion = max_expansion; options.fuzzy.flags = flags; grn_obj_search(ctx, target, query, res, op, &options); } exit : return rc; }
static grn_bool string_have_sub_text(grn_ctx *ctx, const char *text, unsigned int text_len, const char *sub_text, unsigned int sub_text_len) { if (sub_text_len == 0) { return GRN_FALSE; } if (sub_text_len > text_len) { return GRN_FALSE; } #ifdef GRN_SUPPORT_REGEXP if (grn_onigmo_is_valid_encoding(ctx)) { OnigRegex regex; grn_bool matched; regex = grn_onigmo_new(ctx, sub_text, sub_text_len, GRN_ONIGMO_OPTION_DEFAULT, ONIG_SYNTAX_ASIS, "[operator]"); if (!regex) { return GRN_FALSE; } matched = regexp_is_match(ctx, regex, text, text_len); onig_free(regex); return matched; } #endif /* GRN_SUPPORT_REGEXP */ { const char *text_current = text; const char *text_end = text + text_len; const char *sub_text_current = sub_text; const char *sub_text_end = sub_text + sub_text_len; int sub_text_start_char_len; int sub_text_char_len; sub_text_start_char_len = grn_charlen(ctx, sub_text, sub_text_end); if (sub_text_start_char_len == 0) { return GRN_FALSE; } sub_text_char_len = sub_text_start_char_len; while (text_current < text_end) { int text_char_len; text_char_len = grn_charlen(ctx, text_current, text_end); if (text_char_len == 0) { return GRN_FALSE; } if (text_char_len == sub_text_char_len && memcmp(text_current, sub_text_current, text_char_len) == 0) { sub_text_current += sub_text_char_len; if (sub_text_current == sub_text_end) { return GRN_TRUE; } sub_text_char_len = grn_charlen(ctx, sub_text_current, sub_text_end); if (sub_text_char_len == 0) { return GRN_FALSE; } } else { if (sub_text_current != sub_text) { sub_text_current = sub_text; sub_text_char_len = sub_text_start_char_len; continue; } } text_current += text_char_len; } return GRN_FALSE; } }
static grn_string * grn_fake_string_open(grn_ctx *ctx, grn_string *string) { /* TODO: support GRN_STRING_REMOVE_BLANK flag and ctypes */ grn_string *nstr = string; const char *str; unsigned int str_len; str = nstr->original; str_len = nstr->original_length_in_bytes; if (!(nstr->normalized = GRN_MALLOC(str_len + 1))) { ERR(GRN_NO_MEMORY_AVAILABLE, "[strinig][fake] failed to allocate normalized text space"); grn_string_close(ctx, (grn_obj *)nstr); return NULL; } if (nstr->flags & GRN_STRING_REMOVE_TOKENIZED_DELIMITER && ctx->encoding == GRN_ENC_UTF8) { int char_length; const char *source_current = str; const char *source_end = str + str_len; char *destination = nstr->normalized; unsigned int destination_length = 0; while ((char_length = grn_charlen(ctx, source_current, source_end)) > 0) { if (!grn_tokenizer_is_tokenized_delimiter(ctx, source_current, char_length, ctx->encoding)) { memcpy(destination, source_current, char_length); destination += char_length; destination_length += char_length; } source_current += char_length; } nstr->normalized[destination_length] = '\0'; nstr->normalized_length_in_bytes = destination_length; } else { memcpy(nstr->normalized, str, str_len); nstr->normalized[str_len] = '\0'; nstr->normalized_length_in_bytes = str_len; } if (nstr->flags & GRN_STRING_WITH_CHECKS) { int16_t f = 0; unsigned char c; size_t i; if (!(nstr->checks = (int16_t *) GRN_MALLOC(sizeof(int16_t) * str_len))) { grn_string_close(ctx, (grn_obj *)nstr); ERR(GRN_NO_MEMORY_AVAILABLE, "[strinig][fake] failed to allocate checks space"); return NULL; } switch (nstr->encoding) { case GRN_ENC_EUC_JP: for (i = 0; i < str_len; i++) { if (!f) { c = (unsigned char) str[i]; f = ((c >= 0xa1U && c <= 0xfeU) || c == 0x8eU ? 2 : (c == 0x8fU ? 3 : 1) ); nstr->checks[i] = f; } else { nstr->checks[i] = 0; } f--; } break; case GRN_ENC_SJIS: for (i = 0; i < str_len; i++) { if (!f) { c = (unsigned char) str[i]; f = (c >= 0x81U && ((c <= 0x9fU) || (c >= 0xe0U && c <= 0xfcU)) ? 2 : 1); nstr->checks[i] = f; } else { nstr->checks[i] = 0; } f--; } break; case GRN_ENC_UTF8: for (i = 0; i < str_len; i++) { if (!f) { c = (unsigned char) str[i]; f = (c & 0x80U ? (c & 0x20U ? (c & 0x10U ? 4 : 3) : 2) : 1); nstr->checks[i] = f; } else { nstr->checks[i] = 0; } f--; } break; default: for (i = 0; i < str_len; i++) { nstr->checks[i] = 1; } break; } } return nstr; }
static void json_read(grn_ctx *ctx, grn_loader *loader, const char *str, unsigned int str_len) { const char *const beg = str; char c; int len; const char *se = str + str_len; while (str < se) { c = *str; switch (loader->stat) { case GRN_LOADER_BEGIN : if ((len = grn_isspace(str, ctx->encoding))) { str += len; continue; } switch (c) { case '[' : JSON_READ_OPEN_BRACKET(); break; case '{' : JSON_READ_OPEN_BRACE(); break; default : ERR(GRN_INVALID_ARGUMENT, "JSON must start with '[' or '{': <%.*s>", str_len, beg); loader->stat = GRN_LOADER_END; break; } break; case GRN_LOADER_TOKEN : if ((len = grn_isspace(str, ctx->encoding))) { str += len; continue; } switch (c) { case '"' : loader->stat = GRN_LOADER_STRING; values_add(ctx, loader); str++; break; case '[' : JSON_READ_OPEN_BRACKET(); break; case '{' : JSON_READ_OPEN_BRACE(); break; case ':' : str++; break; case ',' : str++; break; case ']' : bracket_close(ctx, loader); loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END; if (ctx->rc == GRN_CANCEL) { loader->stat = GRN_LOADER_END; } str++; break; case '}' : brace_close(ctx, loader); loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END; if (ctx->rc == GRN_CANCEL) { loader->stat = GRN_LOADER_END; } str++; break; case '+' : case '-' : case '0' : case '1' : case '2' : case '3' : case '4' : case '5' : case '6' : case '7' : case '8' : case '9' : loader->stat = GRN_LOADER_NUMBER; values_add(ctx, loader); break; default : if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('_' == c)) { loader->stat = GRN_LOADER_SYMBOL; values_add(ctx, loader); } else { if ((len = grn_charlen(ctx, str, se))) { GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char('%c') at", c); GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg) + len, beg); GRN_LOG(ctx, GRN_LOG_ERROR, "%*s", (int)(str - beg) + 1, "^"); str += len; } else { GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char(\\x%.2x) after", c); GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg), beg); str = se; } } break; } break; case GRN_LOADER_SYMBOL : if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') || ('_' == c)) { GRN_TEXT_PUTC(ctx, loader->last, c); str++; } else { char *v = GRN_TEXT_VALUE(loader->last); switch (*v) { case 'n' : if (GRN_TEXT_LEN(loader->last) == 4 && !memcmp(v, "null", 4)) { loader->last->header.domain = GRN_DB_VOID; GRN_BULK_REWIND(loader->last); } break; case 't' : if (GRN_TEXT_LEN(loader->last) == 4 && !memcmp(v, "true", 4)) { loader->last->header.domain = GRN_DB_BOOL; GRN_BOOL_SET(ctx, loader->last, GRN_TRUE); } break; case 'f' : if (GRN_TEXT_LEN(loader->last) == 5 && !memcmp(v, "false", 5)) { loader->last->header.domain = GRN_DB_BOOL; GRN_BOOL_SET(ctx, loader->last, GRN_FALSE); } break; default : break; } loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END; } break; case GRN_LOADER_NUMBER : switch (c) { case '+' : case '-' : case '.' : case 'e' : case 'E' : case '0' : case '1' : case '2' : case '3' : case '4' : case '5' : case '6' : case '7' : case '8' : case '9' : GRN_TEXT_PUTC(ctx, loader->last, c); str++; break; default : { const char *cur, *str = GRN_BULK_HEAD(loader->last); const char *str_end = GRN_BULK_CURR(loader->last); int64_t i = grn_atoll(str, str_end, &cur); if (cur == str_end) { loader->last->header.domain = GRN_DB_INT64; GRN_INT64_SET(ctx, loader->last, i); } else if (cur != str) { uint64_t i = grn_atoull(str, str_end, &cur); if (cur == str_end) { loader->last->header.domain = GRN_DB_UINT64; GRN_UINT64_SET(ctx, loader->last, i); } else if (cur != str) { double d; char *end; grn_obj buf; GRN_TEXT_INIT(&buf, 0); GRN_TEXT_PUT(ctx, &buf, str, GRN_BULK_VSIZE(loader->last)); GRN_TEXT_PUTC(ctx, &buf, '\0'); errno = 0; d = strtod(GRN_TEXT_VALUE(&buf), &end); if (!errno && end + 1 == GRN_BULK_CURR(&buf)) { loader->last->header.domain = GRN_DB_FLOAT; GRN_FLOAT_SET(ctx, loader->last, d); } GRN_OBJ_FIN(ctx, &buf); } } } loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END; break; } break; case GRN_LOADER_STRING : switch (c) { case '\\' : loader->stat = GRN_LOADER_STRING_ESC; str++; break; case '"' : str++; loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END; /* *(GRN_BULK_CURR(loader->last)) = '\0'; GRN_LOG(ctx, GRN_LOG_ALERT, "read str(%s)", GRN_TEXT_VALUE(loader->last)); */ break; default : if ((len = grn_charlen(ctx, str, se))) { GRN_TEXT_PUT(ctx, loader->last, str, len); str += len; } else { GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char(\\x%.2x) after", c); GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg), beg); str = se; } break; } break; case GRN_LOADER_STRING_ESC : switch (c) { case 'b' : GRN_TEXT_PUTC(ctx, loader->last, '\b'); loader->stat = GRN_LOADER_STRING; break; case 'f' : GRN_TEXT_PUTC(ctx, loader->last, '\f'); loader->stat = GRN_LOADER_STRING; break; case 'n' : GRN_TEXT_PUTC(ctx, loader->last, '\n'); loader->stat = GRN_LOADER_STRING; break; case 'r' : GRN_TEXT_PUTC(ctx, loader->last, '\r'); loader->stat = GRN_LOADER_STRING; break; case 't' : GRN_TEXT_PUTC(ctx, loader->last, '\t'); loader->stat = GRN_LOADER_STRING; break; case 'u' : loader->stat = GRN_LOADER_UNICODE0; break; default : GRN_TEXT_PUTC(ctx, loader->last, c); loader->stat = GRN_LOADER_STRING; break; } str++; break; case GRN_LOADER_UNICODE0 : switch (c) { case '0' : case '1' : case '2' : case '3' : case '4' : case '5' : case '6' : case '7' : case '8' : case '9' : loader->unichar = (c - '0') * 0x1000; break; case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' : loader->unichar = (c - 'a' + 10) * 0x1000; break; case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' : loader->unichar = (c - 'A' + 10) * 0x1000; break; default : ;// todo : error } loader->stat = GRN_LOADER_UNICODE1; str++; break; case GRN_LOADER_UNICODE1 : switch (c) { case '0' : case '1' : case '2' : case '3' : case '4' : case '5' : case '6' : case '7' : case '8' : case '9' : loader->unichar += (c - '0') * 0x100; break; case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' : loader->unichar += (c - 'a' + 10) * 0x100; break; case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' : loader->unichar += (c - 'A' + 10) * 0x100; break; default : ;// todo : error } loader->stat = GRN_LOADER_UNICODE2; str++; break; case GRN_LOADER_UNICODE2 : switch (c) { case '0' : case '1' : case '2' : case '3' : case '4' : case '5' : case '6' : case '7' : case '8' : case '9' : loader->unichar += (c - '0') * 0x10; break; case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' : loader->unichar += (c - 'a' + 10) * 0x10; break; case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' : loader->unichar += (c - 'A' + 10) * 0x10; break; default : ;// todo : error } loader->stat = GRN_LOADER_UNICODE3; str++; break; case GRN_LOADER_UNICODE3 : switch (c) { case '0' : case '1' : case '2' : case '3' : case '4' : case '5' : case '6' : case '7' : case '8' : case '9' : loader->unichar += (c - '0'); break; case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' : loader->unichar += (c - 'a' + 10); break; case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' : loader->unichar += (c - 'A' + 10); break; default : ;// todo : error } { uint32_t u = loader->unichar; if (u >= 0xd800 && u <= 0xdbff) { /* High-surrogate code points */ loader->unichar_hi = u; loader->stat = GRN_LOADER_STRING; str++; break; } if (u >= 0xdc00 && u <= 0xdfff) { /* Low-surrogate code points */ u = 0x10000 + (loader->unichar_hi - 0xd800) * 0x400 + u - 0xdc00; } if (u < 0x80) { GRN_TEXT_PUTC(ctx, loader->last, u); } else { if (u < 0x800) { GRN_TEXT_PUTC(ctx, loader->last, (u >> 6) | 0xc0); } else { if (u < 0x10000) { GRN_TEXT_PUTC(ctx, loader->last, (u >> 12) | 0xe0); } else { GRN_TEXT_PUTC(ctx, loader->last, (u >> 18) | 0xf0); GRN_TEXT_PUTC(ctx, loader->last, ((u >> 12) & 0x3f) | 0x80); } GRN_TEXT_PUTC(ctx, loader->last, ((u >> 6) & 0x3f) | 0x80); } GRN_TEXT_PUTC(ctx, loader->last, (u & 0x3f) | 0x80); }
int grn_dat_scan(grn_ctx *ctx, grn_dat *dat, const char *str, unsigned int str_size, grn_dat_scan_hit *scan_hits, unsigned int max_num_scan_hits, const char **str_rest) { if (!grn_dat_open_trie_if_needed(ctx, dat) || !str || !(dat->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) || !scan_hits) { return -1; } grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); if (!trie) { return -1; } if (!max_num_scan_hits || !str_size) { if (str_rest) { *str_rest = str; } return 0; } int num_scan_hits = 0; try { if (dat->obj.header.flags & GRN_OBJ_KEY_NORMALIZE) { grn_str * const normalized_str = grn_str_open( ctx, str, str_size, GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS); if (!normalized_str) { fprintf(stderr, "error: grn_str_open() failed!\n"); return -1; } str = normalized_str->norm; str_size = normalized_str->norm_blen; const short *checks = normalized_str->checks; unsigned int offset = 0; while (str_size) { if (*checks) { grn::dat::UInt32 key_pos; if (trie->lcp_search(str, str_size, &key_pos)) { const grn::dat::Key &key = trie->get_key(key_pos); const grn::dat::UInt32 key_length = key.length(); if ((key_length == str_size) || (checks[key_length])) { unsigned int length = 0; for (grn::dat::UInt32 i = 0; i < key_length; ++i) { if (checks[i] > 0) { length += checks[i]; } } scan_hits[num_scan_hits].id = key.id(); scan_hits[num_scan_hits].offset = offset; scan_hits[num_scan_hits].length = length; offset += length; str += key_length; str_size -= key_length; checks += key_length; if (++num_scan_hits >= max_num_scan_hits) { break; } continue; } } if (*checks > 0) { offset += *checks; } } ++str; --str_size; ++checks; } if (str_rest) { *str_rest = normalized_str->orig + offset; } grn_str_close(ctx, normalized_str); } else { const char * const begin = str; while (str_size) { grn::dat::UInt32 key_pos; if (trie->lcp_search(str, str_size, &key_pos)) { const grn::dat::Key &key = trie->get_key(key_pos); scan_hits[num_scan_hits].id = key.id(); scan_hits[num_scan_hits].offset = str - begin; scan_hits[num_scan_hits].length = key.length(); str += key.length(); str_size -= key.length(); if (++num_scan_hits >= max_num_scan_hits) { break; } } else { const int char_length = grn_charlen(ctx, str, str + str_size); if (char_length) { str += char_length; str_size -= char_length; } else { ++str; --str_size; } } } if (str_rest) { *str_rest = str; } } } catch (const grn::dat::Exception &ex) { ERR(grn_dat_translate_error_code(ex.code()), "grn::dat::lcp_search failed"); return -1; } return num_scan_hits; }