void grn_bm_tunedbm(grn_ctx *ctx, snip_cond *cond, grn_obj *string, int flags) { register unsigned char *limit, ck; register const unsigned char *p, *cp; register size_t *bmBc, delta1, i; const unsigned char *x; unsigned char *y; size_t shift, found; const char *string_original; unsigned int string_original_length_in_bytes; const short *string_checks; grn_encoding string_encoding; const char *string_norm, *keyword_norm; unsigned int n, m; grn_string_get_original(ctx, string, &string_original, &string_original_length_in_bytes); string_checks = grn_string_get_checks(ctx, string); string_encoding = grn_string_get_encoding(ctx, string); grn_string_get_normalized(ctx, string, &string_norm, &n, NULL); grn_string_get_normalized(ctx, cond->keyword, &keyword_norm, &m, NULL); y = (unsigned char *)string_norm; if (m == 1) { if (n > cond->found) { shift = 1; p = memchr(y + cond->found, keyword_norm[0], n - cond->found); if (p != NULL) { found = p - y; GRN_BM_COMPARE; } } cond->stopflag = SNIPCOND_STOP; return; } x = (unsigned char *)keyword_norm; bmBc = cond->bmBc; shift = cond->shift; /* Restart */ p = y + m + cond->found; cp = x + m; ck = cp[-2]; /* 12 means 1(initial offset) + 10 (in loop) + 1 (shift) */ if (n - cond->found > 12 * m) { limit = y + n - 11 * m; while (p <= limit) { p += bmBc[p[-1]]; if(!(delta1 = bmBc[p[-1]])) { goto check; } p += delta1; p += bmBc[p[-1]]; p += bmBc[p[-1]]; if(!(delta1 = bmBc[p[-1]])) { goto check; } p += delta1; p += bmBc[p[-1]]; p += bmBc[p[-1]]; if(!(delta1 = bmBc[p[-1]])) { goto check; } p += delta1; p += bmBc[p[-1]]; p += bmBc[p[-1]]; continue; check: GRN_BM_BM_COMPARE; p += shift; } } /* limit check + search */ limit = y + n; while(p <= limit) { if (!(delta1 = bmBc[p[-1]])) { GRN_BM_BM_COMPARE; p += shift; } p += delta1; } cond->stopflag = SNIPCOND_STOP; }
int grn_dat_scan(grn_ctx *ctx, grn_dat *dat, const char *str, unsigned int str_size, grn_dat_scan_hit *scan_hits, unsigned int max_num_scan_hits, const char **str_rest) { if (!grn_dat_open_trie_if_needed(ctx, dat) || !str || !(dat->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) || !scan_hits) { return -1; } grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); if (!trie) { return -1; } if (!max_num_scan_hits || !str_size) { if (str_rest) { *str_rest = str; } return 0; } int num_scan_hits = 0; try { if (dat->obj.header.flags & GRN_OBJ_KEY_NORMALIZE) { grn_obj *normalizer = GRN_NORMALIZER_AUTO; int flags = GRN_STRING_WITH_CHECKS; grn_obj * const normalized_string = grn_string_open(ctx, str, str_size, normalizer, flags); if (!normalized_string) { fprintf(stderr, "error: grn_string_open() failed!\n"); return -1; } grn_string_get_normalized(ctx, normalized_string, &str, &str_size, NULL); const short *checks = grn_string_get_checks(ctx, normalized_string); unsigned int offset = 0; while (str_size) { if (*checks) { grn::dat::UInt32 key_pos; if (trie->lcp_search(str, str_size, &key_pos)) { const grn::dat::Key &key = trie->get_key(key_pos); const grn::dat::UInt32 key_length = key.length(); if ((key_length == str_size) || (checks[key_length])) { unsigned int length = 0; for (grn::dat::UInt32 i = 0; i < key_length; ++i) { if (checks[i] > 0) { length += checks[i]; } } scan_hits[num_scan_hits].id = key.id(); scan_hits[num_scan_hits].offset = offset; scan_hits[num_scan_hits].length = length; offset += length; str += key_length; str_size -= key_length; checks += key_length; if (++num_scan_hits >= max_num_scan_hits) { break; } continue; } } if (*checks > 0) { offset += *checks; } } ++str; --str_size; ++checks; } if (str_rest) { grn_string_get_original(ctx, normalized_string, str_rest, NULL); *str_rest += offset; } grn_obj_close(ctx, normalized_string); } else { const char * const begin = str; while (str_size) { grn::dat::UInt32 key_pos; if (trie->lcp_search(str, str_size, &key_pos)) { const grn::dat::Key &key = trie->get_key(key_pos); scan_hits[num_scan_hits].id = key.id(); scan_hits[num_scan_hits].offset = str - begin; scan_hits[num_scan_hits].length = key.length(); str += key.length(); str_size -= key.length(); if (++num_scan_hits >= max_num_scan_hits) { break; } } else { const int char_length = grn_charlen(ctx, str, str + str_size); if (char_length) { str += char_length; str_size -= char_length; } else { ++str; --str_size; } } } if (str_rest) { *str_rest = str; } } } catch (const grn::dat::Exception &ex) { ERR(grn_dat_translate_error_code(ex.code()), "grn::dat::lcp_search failed"); return -1; } return num_scan_hits; }