grn_rc grn_query_scan(grn_ctx *ctx, grn_query *q, const char **strs, unsigned int *str_lens, unsigned int nstrs, int flags, int *found, int *score) { unsigned int i; grn_rc rc; if (!q || !strs || !nstrs) { return GRN_INVALID_ARGUMENT; } *found = *score = 0; if (!q->snip_conds) { if ((rc = alloc_snip_conds(ctx, q))) { return rc; } flags |= GRN_QUERY_SCAN_ALLOCCONDS; } else if (flags & GRN_QUERY_SCAN_ALLOCCONDS) { GRN_LOG(ctx, GRN_LOG_WARNING, "invalid flags specified on grn_query_scan"); return GRN_INVALID_ARGUMENT; } for (i = 0; i < nstrs; i++) { grn_str *n; snip_cond *sc = q->snip_conds; int f = GRN_STR_WITH_CHECKS | GRN_STR_REMOVEBLANK; if (flags & GRN_QUERY_SCAN_NORMALIZE) { f |= GRN_STR_NORMALIZE; } n = grn_str_open(ctx, *(strs + i), *(str_lens + i), f); if (!n) { return GRN_NO_MEMORY_AVAILABLE; } if ((rc = scan_query(ctx, q, n, i + 1, q->expr, &sc, GRN_OP_OR, flags, found, score))) { grn_str_close(ctx, n); return rc; } flags &= ~GRN_QUERY_SCAN_ALLOCCONDS; grn_str_close(ctx, n); } return GRN_SUCCESS; }
void test_normalize(gconstpointer data) { const gchar *utf8_expected, *encoded_expected; const gchar *utf8_input, *encoded_input; grn_str *string; const gchar *normalized_text; guint normalized_text_len; int flags; grn_encoding encoding; encoding = gcut_data_get_int(data, "encoding"); GRN_CTX_SET_ENCODING(&context, encoding); flags = GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS | GRN_STR_WITH_CTYPES; utf8_input = gcut_data_get_string(data, "input"); encoded_input = convert_encoding(utf8_input, encoding); string = grn_str_open(&context, encoded_input, strlen(encoded_input), flags); normalized_text = cut_take_strndup(string->norm, string->norm_blen); normalized_text_len = string->norm_blen; grn_test_assert(grn_str_close(&context, string)); utf8_expected = gcut_data_get_string(data, "expected"); encoded_expected = convert_encoding(utf8_expected, encoding); cut_assert_equal_string(encoded_expected, normalized_text); cut_assert_equal_int(strlen(encoded_expected), normalized_text_len); }
void test_normalize_broken(gconstpointer data) { grn_str *string; const gchar *input, *encoded_input; grn_encoding input_encoding, context_encoding; gint input_length; int flags = GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS | GRN_STR_WITH_CTYPES; context_encoding = gcut_data_get_int(data, "context-encoding"); GRN_CTX_SET_ENCODING(&context, context_encoding); input = gcut_data_get_string(data, "input"); input_encoding = gcut_data_get_int(data, "input-encoding"); input_length = gcut_data_get_int(data, "input-length"); encoded_input = convert_encoding(input, input_encoding); if (input_length < 0) { input_length = strlen(encoded_input); } string = grn_str_open(&context, encoded_input, input_length, flags); cut_assert_equal_string("", string->norm); cut_assert_equal_int(0, string->norm_blen); grn_test_assert(grn_str_close(&context, string)); }
int grn_dat_scan(grn_ctx *ctx, grn_dat *dat, const char *str, unsigned int str_size, grn_dat_scan_hit *scan_hits, unsigned int max_num_scan_hits, const char **str_rest) { if (!grn_dat_open_trie_if_needed(ctx, dat) || !str || !(dat->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) || !scan_hits) { return -1; } grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); if (!trie) { return -1; } if (!max_num_scan_hits || !str_size) { if (str_rest) { *str_rest = str; } return 0; } int num_scan_hits = 0; try { if (dat->obj.header.flags & GRN_OBJ_KEY_NORMALIZE) { grn_str * const normalized_str = grn_str_open( ctx, str, str_size, GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS); if (!normalized_str) { fprintf(stderr, "error: grn_str_open() failed!\n"); return -1; } str = normalized_str->norm; str_size = normalized_str->norm_blen; const short *checks = normalized_str->checks; unsigned int offset = 0; while (str_size) { if (*checks) { grn::dat::UInt32 key_pos; if (trie->lcp_search(str, str_size, &key_pos)) { const grn::dat::Key &key = trie->get_key(key_pos); const grn::dat::UInt32 key_length = key.length(); if ((key_length == str_size) || (checks[key_length])) { unsigned int length = 0; for (grn::dat::UInt32 i = 0; i < key_length; ++i) { if (checks[i] > 0) { length += checks[i]; } } scan_hits[num_scan_hits].id = key.id(); scan_hits[num_scan_hits].offset = offset; scan_hits[num_scan_hits].length = length; offset += length; str += key_length; str_size -= key_length; checks += key_length; if (++num_scan_hits >= max_num_scan_hits) { break; } continue; } } if (*checks > 0) { offset += *checks; } } ++str; --str_size; ++checks; } if (str_rest) { *str_rest = normalized_str->orig + offset; } grn_str_close(ctx, normalized_str); } else { const char * const begin = str; while (str_size) { grn::dat::UInt32 key_pos; if (trie->lcp_search(str, str_size, &key_pos)) { const grn::dat::Key &key = trie->get_key(key_pos); scan_hits[num_scan_hits].id = key.id(); scan_hits[num_scan_hits].offset = str - begin; scan_hits[num_scan_hits].length = key.length(); str += key.length(); str_size -= key.length(); if (++num_scan_hits >= max_num_scan_hits) { break; } } else { const int char_length = grn_charlen(ctx, str, str + str_size); if (char_length) { str += char_length; str_size -= char_length; } else { ++str; --str_size; } } } if (str_rest) { *str_rest = str; } } } catch (const grn::dat::Exception &ex) { ERR(grn_dat_translate_error_code(ex.code()), "grn::dat::lcp_search failed"); return -1; } return num_scan_hits; }