PhraseCompiler* PhraseCompiler_init(PhraseCompiler *self, PhraseQuery *parent, Searcher *searcher, float boost) { Schema *schema = Searcher_Get_Schema(searcher); Similarity *sim = Schema_Fetch_Sim(schema, parent->field); VArray *terms = parent->terms; uint32_t i, max; // Try harder to find a Similarity if necessary. if (!sim) { sim = Schema_Get_Similarity(schema); } // Init. Compiler_init((Compiler*)self, (Query*)parent, searcher, sim, boost); // Store IDF for the phrase. self->idf = 0; for (i = 0, max = VA_Get_Size(terms); i < max; i++) { Obj *term = VA_Fetch(terms, i); int32_t doc_max = Searcher_Doc_Max(searcher); int32_t doc_freq = Searcher_Doc_Freq(searcher, parent->field, term); self->idf += Sim_IDF(sim, doc_freq, doc_max); } // Calculate raw weight. self->raw_weight = self->idf * self->boost; // Make final preparations. PhraseCompiler_Normalize(self); return self; }
PolySearcher* PolySearcher_init(PolySearcher *self, Schema *schema, VArray *searchers) { const uint32_t num_searchers = VA_Get_Size(searchers); int32_t *starts_array = (int32_t*)MALLOCATE(num_searchers * sizeof(int32_t)); int32_t doc_max = 0; Searcher_init((Searcher*)self, schema); PolySearcherIVARS *const ivars = PolySearcher_IVARS(self); ivars->searchers = (VArray*)INCREF(searchers); ivars->starts = NULL; // Safe cleanup. for (uint32_t i = 0; i < num_searchers; i++) { Searcher *searcher = (Searcher*)CERTIFY(VA_Fetch(searchers, i), SEARCHER); Schema *candidate = Searcher_Get_Schema(searcher); Class *orig_class = Schema_Get_Class(schema); Class *candidate_class = Schema_Get_Class(candidate); // Confirm that searchers all use the same schema. if (orig_class != candidate_class) { THROW(ERR, "Conflicting schemas: '%o', '%o'", Schema_Get_Class_Name(schema), Schema_Get_Class_Name(candidate)); } // Derive doc_max and relative start offsets. starts_array[i] = (int32_t)doc_max; doc_max += Searcher_Doc_Max(searcher); } ivars->doc_max = doc_max; ivars->starts = I32Arr_new_steal(starts_array, num_searchers); return self; }
PhraseCompiler* PhraseCompiler_init(PhraseCompiler *self, PhraseQuery *parent, Searcher *searcher, float boost) { PhraseCompilerIVARS *const ivars = PhraseCompiler_IVARS(self); PhraseQueryIVARS *const parent_ivars = PhraseQuery_IVARS(parent); Schema *schema = Searcher_Get_Schema(searcher); Similarity *sim = Schema_Fetch_Sim(schema, parent_ivars->field); Vector *terms = parent_ivars->terms; // Try harder to find a Similarity if necessary. if (!sim) { sim = Schema_Get_Similarity(schema); } // Init. Compiler_init((Compiler*)self, (Query*)parent, searcher, sim, boost); // Store IDF for the phrase. ivars->idf = 0; for (uint32_t i = 0, max = Vec_Get_Size(terms); i < max; i++) { Obj *term = Vec_Fetch(terms, i); int32_t doc_max = Searcher_Doc_Max(searcher); int32_t doc_freq = Searcher_Doc_Freq(searcher, parent_ivars->field, term); ivars->idf += Sim_IDF(sim, doc_freq, doc_max); } // Calculate raw weight. ivars->raw_weight = ivars->idf * ivars->boost; return self; }
void TestQPSyntax_run_tests(Folder *index) { u32_t i; TestBatch *batch = Test_new_batch("TestQueryParserSyntax", 48, NULL); Searcher *searcher = Searcher_new((Obj*)index); QueryParser *qparser = QParser_new(Searcher_Get_Schema(searcher), NULL, NULL, NULL); QParser_Set_Heed_Colons(qparser, true); PLAN(batch); for (i = 0; leaf_test_funcs[i] != NULL; i++) { kino_TestQPSyntax_test_t test_func = leaf_test_funcs[i]; TestQueryParser *test_case = test_func(i); Query *tree = QParser_Tree(qparser, test_case->query_string); Query *expanded = QParser_Expand_Leaf(qparser, test_case->tree); Query *parsed = QParser_Parse(qparser, test_case->query_string); Hits *hits = Searcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL); ASSERT_TRUE(batch, Query_Equals(tree, (Obj*)test_case->tree), "tree() %s", test_case->query_string->ptr); ASSERT_TRUE(batch, Query_Equals(expanded, (Obj*)test_case->expanded), "expand_leaf() %s", test_case->query_string->ptr); ASSERT_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits, "hits: %s", test_case->query_string->ptr); DECREF(hits); DECREF(parsed); DECREF(expanded); DECREF(tree); DECREF(test_case); } for (i = 0; syntax_test_funcs[i] != NULL; i++) { kino_TestQPSyntax_test_t test_func = syntax_test_funcs[i]; TestQueryParser *test_case = test_func(i); Query *tree = QParser_Tree(qparser, test_case->query_string); Query *parsed = QParser_Parse(qparser, test_case->query_string); Hits *hits = Searcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL); ASSERT_TRUE(batch, Query_Equals(tree, (Obj*)test_case->tree), "tree() %s", test_case->query_string->ptr); ASSERT_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits, "hits: %s", test_case->query_string->ptr); DECREF(hits); DECREF(parsed); DECREF(tree); DECREF(test_case); } batch->destroy(batch); DECREF(searcher); DECREF(qparser); }
Compiler* Compiler_init(Compiler *self, Query *parent, Searcher *searcher, Similarity *sim, float boost) { Query_init((Query*)self, boost); if (!sim) { Schema *schema = Searcher_Get_Schema(searcher); sim = Schema_Get_Similarity(schema); } self->parent = (Query*)INCREF(parent); self->sim = (Similarity*)INCREF(sim); ABSTRACT_CLASS_CHECK(self, COMPILER); return self; }
TermCompiler* TermCompiler_init(TermCompiler *self, Query *parent, Searcher *searcher, float boost) { TermCompilerIVARS *const ivars = TermCompiler_IVARS(self); TermQueryIVARS *const parent_ivars = TermQuery_IVARS((TermQuery*)parent); Schema *schema = Searcher_Get_Schema(searcher); Similarity *sim = Schema_Fetch_Sim(schema, parent_ivars->field); // Try harder to get a Similarity if necessary. if (!sim) { sim = Schema_Get_Similarity(schema); } // Init. Compiler_init((Compiler*)self, parent, searcher, sim, boost); ivars->normalized_weight = 0.0f; ivars->query_norm_factor = 0.0f; // Derive. int32_t doc_max = Searcher_Doc_Max(searcher); uint32_t doc_freq = Searcher_Doc_Freq(searcher, parent_ivars->field, parent_ivars->term); ivars->idf = Sim_IDF(sim, (int32_t)doc_freq, doc_max); /* The score of any document is approximately equal to: * * (tf_d * idf_t / norm_d) * (tf_q * idf_t / norm_q) * * Here we add in the first IDF, plus user-supplied boost. * * The second clause is factored in by the call to Normalize(). * * tf_d and norm_d can only be added by the Matcher, since they are * per-document. */ ivars->raw_weight = ivars->idf * ivars->boost; return self; }
void TestQPLogic_run_tests() { u32_t i; TestBatch *batch = Test_new_batch("TestQueryParserLogic", 178, NULL); Folder *folder = S_create_index(); Searcher *searcher = Searcher_new((Obj*)folder); QueryParser *or_parser = QParser_new(Searcher_Get_Schema(searcher), NULL, NULL, NULL); static ZombieCharBuf AND = ZCB_LITERAL("AND"); QueryParser *and_parser = QParser_new(Searcher_Get_Schema(searcher), NULL, (CharBuf*)&AND, NULL); QParser_Set_Heed_Colons(or_parser, true); QParser_Set_Heed_Colons(and_parser, true); PLAN(batch); /* Run logical tests with default boolop of OR. */ for (i = 0; logical_test_funcs[i] != NULL; i++) { kino_TestQPLogic_logical_test_t test_func = logical_test_funcs[i]; TestQueryParser *test_case = test_func(BOOLOP_OR); Query *tree = QParser_Tree(or_parser, test_case->query_string); Query *parsed = QParser_Parse(or_parser, test_case->query_string); Hits *hits = Searcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL); ASSERT_TRUE(batch, Query_Equals(tree, (Obj*)test_case->tree), "tree() OR %s", test_case->query_string->ptr); ASSERT_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits, "hits: OR %s", test_case->query_string->ptr); DECREF(hits); DECREF(parsed); DECREF(tree); DECREF(test_case); } /* Run logical tests with default boolop of AND. */ for (i = 0; logical_test_funcs[i] != NULL; i++) { kino_TestQPLogic_logical_test_t test_func = logical_test_funcs[i]; TestQueryParser *test_case = test_func(BOOLOP_AND); Query *tree = QParser_Tree(and_parser, test_case->query_string); Query *parsed = QParser_Parse(and_parser, test_case->query_string); Hits *hits = Searcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL); ASSERT_TRUE(batch, Query_Equals(tree, (Obj*)test_case->tree), "tree() AND %s", test_case->query_string->ptr); ASSERT_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits, "hits: AND %s", test_case->query_string->ptr); DECREF(hits); DECREF(parsed); DECREF(tree); DECREF(test_case); } /* Run tests for QParser_Prune(). */ for (i = 0; prune_test_funcs[i] != NULL; i++) { kino_TestQPLogic_prune_test_t test_func = prune_test_funcs[i]; TestQueryParser *test_case = test_func(); CharBuf *qstring = test_case->tree ? Obj_To_String(test_case->tree) : CB_new_from_trusted_utf8("(NULL)", 6); Query *tree = test_case->tree; Query *wanted = test_case->expanded; Query *pruned = QParser_Prune(or_parser, tree); Query *expanded; Hits *hits; ASSERT_TRUE(batch, Query_Equals(pruned, (Obj*)wanted), "prune() %s", qstring->ptr); expanded = QParser_Expand(or_parser, pruned); hits = Searcher_Hits(searcher, (Obj*)expanded, 0, 10, NULL); ASSERT_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits, "hits: %s", qstring->ptr); DECREF(hits); DECREF(expanded); DECREF(pruned); DECREF(qstring); DECREF(test_case); } DECREF(and_parser); DECREF(or_parser); DECREF(searcher); DECREF(folder); batch->destroy(batch); }