static grn_rc sequential_fuzzy_search(grn_ctx *ctx, grn_obj *table, grn_obj *column, grn_obj *query, uint32_t max_distance, uint32_t prefix_match_size, uint32_t max_expansion, int flags, grn_obj *res, grn_operator op) { grn_table_cursor *tc; char *sx = GRN_TEXT_VALUE(query); char *ex = GRN_BULK_CURR(query); if (op == GRN_OP_AND) { tc = grn_table_cursor_open(ctx, res, NULL, 0, NULL, 0, 0, -1, GRN_CURSOR_BY_ID); } else { tc = grn_table_cursor_open(ctx, table, NULL, 0, NULL, 0, 0, -1, GRN_CURSOR_BY_ID); } if (tc) { grn_id id; grn_obj value; score_heap *heap; int i, n; GRN_TEXT_INIT(&value, 0); heap = score_heap_open(ctx, SCORE_HEAP_SIZE); if (!heap) { grn_table_cursor_close(ctx, tc); grn_obj_unlink(ctx, &value); return GRN_NO_MEMORY_AVAILABLE; } while ((id = grn_table_cursor_next(ctx, tc))) { unsigned int distance = 0; grn_obj *domain; GRN_BULK_REWIND(&value); grn_obj_get_value(ctx, column, id, &value); domain = grn_ctx_at(ctx, ((&value))->header.domain); if ((&(value))->header.type == GRN_VECTOR) { n = grn_vector_size(ctx, &value); for (i = 0; i < n; i++) { unsigned int length; const char *vector_value = NULL; length = grn_vector_get_element(ctx, &value, i, &vector_value, NULL, NULL); if (!prefix_match_size || (prefix_match_size > 0 && length >= prefix_match_size && !memcmp(sx, vector_value, prefix_match_size))) { distance = calc_edit_distance(ctx, sx, ex, (char *)vector_value, (char *)vector_value + length, flags); if (distance <= max_distance) { score_heap_push(ctx, heap, id, distance); break; } } } } else if ((&(value))->header.type == GRN_UVECTOR && grn_obj_is_table(ctx, domain)) { n = grn_vector_size(ctx, &value); for (i = 0; i < n; i++) { grn_id rid; char key_name[GRN_TABLE_MAX_KEY_SIZE]; int key_length; rid = grn_uvector_get_element(ctx, &value, i, NULL); key_length = grn_table_get_key(ctx, domain, rid, key_name, GRN_TABLE_MAX_KEY_SIZE); if (!prefix_match_size || (prefix_match_size > 0 && key_length >= prefix_match_size && !memcmp(sx, key_name, prefix_match_size))) { distance = calc_edit_distance(ctx, sx, ex, key_name, key_name + key_length, flags); if (distance <= max_distance) { score_heap_push(ctx, heap, id, distance); break; } } } } else { if (grn_obj_is_reference_column(ctx, column)) { grn_id rid; char key_name[GRN_TABLE_MAX_KEY_SIZE]; int key_length; rid = GRN_RECORD_VALUE(&value); key_length = grn_table_get_key(ctx, domain, rid, key_name, GRN_TABLE_MAX_KEY_SIZE); if (!prefix_match_size || (prefix_match_size > 0 && key_length >= prefix_match_size && !memcmp(sx, key_name, prefix_match_size))) { distance = calc_edit_distance(ctx, sx, ex, key_name, key_name + key_length, flags); if (distance <= max_distance) { score_heap_push(ctx, heap, id, distance); } } } else { if (!prefix_match_size || (prefix_match_size > 0 && GRN_TEXT_LEN(&value) >= prefix_match_size && !memcmp(sx, GRN_TEXT_VALUE(&value), prefix_match_size))) { distance = calc_edit_distance(ctx, sx, ex, GRN_TEXT_VALUE(&value), GRN_BULK_CURR(&value), flags); if (distance <= max_distance) { score_heap_push(ctx, heap, id, distance); } } } } grn_obj_unlink(ctx, domain); } grn_table_cursor_close(ctx, tc); grn_obj_unlink(ctx, &value); for (i = 0; i < heap->n_entries; i++) { if (max_expansion > 0 && i >= max_expansion) { break; } { grn_posting posting; posting.rid = heap->nodes[i].id; posting.sid = 1; posting.pos = 0; posting.weight = max_distance - heap->nodes[i].score; grn_ii_posting_add(ctx, &posting, (grn_hash *)res, op); } } grn_ii_resolve_sel_and(ctx, (grn_hash *)res, op); score_heap_close(ctx, heap); } return GRN_SUCCESS; }
static mrb_value mrb_grn_index_cursor_select(mrb_state *mrb, mrb_value self) { grn_ctx *ctx = (grn_ctx *)mrb->ud; mrb_value mrb_result_set; mrb_value mrb_options; grn_obj *index_cursor; grn_obj *expr = NULL; grn_obj *expr_variable = NULL; int offset = 0; int limit = 10; int n_matched_records = 0; mrb_value mrb_index; grn_obj *index; grn_obj *lexicon; grn_obj *data_table; grn_hash *result_set; grn_posting *posting; grn_id term_id; grn_operator op = GRN_OP_OR; mrb_get_args(mrb, "o|H", &mrb_result_set, &mrb_options); index_cursor = DATA_PTR(self); result_set = DATA_PTR(mrb_result_set); if (!mrb_nil_p(mrb_options)) { mrb_value mrb_expr; mrb_value mrb_offset; mrb_value mrb_limit; mrb_expr = grn_mrb_options_get_lit(mrb, mrb_options, "expression"); if (!mrb_nil_p(mrb_expr)) { expr = DATA_PTR(mrb_expr); expr_variable = grn_expr_get_var_by_offset(ctx, expr, 0); } mrb_offset = grn_mrb_options_get_lit(mrb, mrb_options, "offset"); if (!mrb_nil_p(mrb_offset)) { offset = mrb_fixnum(mrb_offset); } mrb_limit = grn_mrb_options_get_lit(mrb, mrb_options, "limit"); if (!mrb_nil_p(mrb_limit)) { limit = mrb_fixnum(mrb_limit); } } if (limit <= 0) { return mrb_fixnum_value(n_matched_records); } mrb_index = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@index")); index = DATA_PTR(mrb_index); lexicon = ((grn_ii *)index)->lexicon; data_table = grn_ctx_at(ctx, grn_obj_get_range(ctx, index)); while ((posting = grn_index_cursor_next(ctx, index_cursor, &term_id))) { if (expr) { grn_bool matched_raw; grn_obj *matched; GRN_RECORD_SET(ctx, expr_variable, posting->rid); matched = grn_expr_exec(ctx, expr, 0); if (!matched) { grn_mrb_ctx_check(mrb); continue; } GRN_TRUEP(ctx, matched, matched_raw); if (!matched_raw) { continue; } } n_matched_records++; if (offset > 0) { offset--; continue; } grn_ii_posting_add(ctx, (grn_ii_posting *)posting, result_set, op); limit--; if (limit == 0) { break; } } grn_ii_resolve_sel_and(ctx, result_set, op); return mrb_fixnum_value(n_matched_records); }