PolySearcher* PolySearcher_init(PolySearcher *self, Schema *schema, VArray *searchers) { const uint32_t num_searchers = VA_Get_Size(searchers); int32_t *starts_array = (int32_t*)MALLOCATE(num_searchers * sizeof(int32_t)); int32_t doc_max = 0; Searcher_init((Searcher*)self, schema); PolySearcherIVARS *const ivars = PolySearcher_IVARS(self); ivars->searchers = (VArray*)INCREF(searchers); ivars->starts = NULL; // Safe cleanup. for (uint32_t i = 0; i < num_searchers; i++) { Searcher *searcher = (Searcher*)CERTIFY(VA_Fetch(searchers, i), SEARCHER); Schema *candidate = Searcher_Get_Schema(searcher); Class *orig_class = Schema_Get_Class(schema); Class *candidate_class = Schema_Get_Class(candidate); // Confirm that searchers all use the same schema. if (orig_class != candidate_class) { THROW(ERR, "Conflicting schemas: '%o', '%o'", Schema_Get_Class_Name(schema), Schema_Get_Class_Name(candidate)); } // Derive doc_max and relative start offsets. starts_array[i] = (int32_t)doc_max; doc_max += Searcher_Doc_Max(searcher); } ivars->doc_max = doc_max; ivars->starts = I32Arr_new_steal(starts_array, num_searchers); return self; }
PhraseCompiler* PhraseCompiler_init(PhraseCompiler *self, PhraseQuery *parent, Searcher *searcher, float boost) { PhraseCompilerIVARS *const ivars = PhraseCompiler_IVARS(self); PhraseQueryIVARS *const parent_ivars = PhraseQuery_IVARS(parent); Schema *schema = Searcher_Get_Schema(searcher); Similarity *sim = Schema_Fetch_Sim(schema, parent_ivars->field); Vector *terms = parent_ivars->terms; // Try harder to find a Similarity if necessary. if (!sim) { sim = Schema_Get_Similarity(schema); } // Init. Compiler_init((Compiler*)self, (Query*)parent, searcher, sim, boost); // Store IDF for the phrase. ivars->idf = 0; for (uint32_t i = 0, max = Vec_Get_Size(terms); i < max; i++) { Obj *term = Vec_Fetch(terms, i); int32_t doc_max = Searcher_Doc_Max(searcher); int32_t doc_freq = Searcher_Doc_Freq(searcher, parent_ivars->field, term); ivars->idf += Sim_IDF(sim, doc_freq, doc_max); } // Calculate raw weight. ivars->raw_weight = ivars->idf * ivars->boost; return self; }
PhraseCompiler* PhraseCompiler_init(PhraseCompiler *self, PhraseQuery *parent, Searcher *searcher, float boost) { Schema *schema = Searcher_Get_Schema(searcher); Similarity *sim = Schema_Fetch_Sim(schema, parent->field); VArray *terms = parent->terms; uint32_t i, max; // Try harder to find a Similarity if necessary. if (!sim) { sim = Schema_Get_Similarity(schema); } // Init. Compiler_init((Compiler*)self, (Query*)parent, searcher, sim, boost); // Store IDF for the phrase. self->idf = 0; for (i = 0, max = VA_Get_Size(terms); i < max; i++) { Obj *term = VA_Fetch(terms, i); int32_t doc_max = Searcher_Doc_Max(searcher); int32_t doc_freq = Searcher_Doc_Freq(searcher, parent->field, term); self->idf += Sim_IDF(sim, doc_freq, doc_max); } // Calculate raw weight. self->raw_weight = self->idf * self->boost; // Make final preparations. PhraseCompiler_Normalize(self); return self; }
TermCompiler* TermCompiler_init(TermCompiler *self, Query *parent, Searcher *searcher, float boost) { TermCompilerIVARS *const ivars = TermCompiler_IVARS(self); TermQueryIVARS *const parent_ivars = TermQuery_IVARS((TermQuery*)parent); Schema *schema = Searcher_Get_Schema(searcher); Similarity *sim = Schema_Fetch_Sim(schema, parent_ivars->field); // Try harder to get a Similarity if necessary. if (!sim) { sim = Schema_Get_Similarity(schema); } // Init. Compiler_init((Compiler*)self, parent, searcher, sim, boost); ivars->normalized_weight = 0.0f; ivars->query_norm_factor = 0.0f; // Derive. int32_t doc_max = Searcher_Doc_Max(searcher); uint32_t doc_freq = Searcher_Doc_Freq(searcher, parent_ivars->field, parent_ivars->term); ivars->idf = Sim_IDF(sim, (int32_t)doc_freq, doc_max); /* The score of any document is approximately equal to: * * (tf_d * idf_t / norm_d) * (tf_q * idf_t / norm_q) * * Here we add in the first IDF, plus user-supplied boost. * * The second clause is factored in by the call to Normalize(). * * tf_d and norm_d can only be added by the Matcher, since they are * per-document. */ ivars->raw_weight = ivars->idf * ivars->boost; return self; }