bool PhraseQuery_Equals_IMP(PhraseQuery *self, Obj *other) { if ((PhraseQuery*)other == self) { return true; } if (!Obj_is_a(other, PHRASEQUERY)) { return false; } PhraseQueryIVARS *const ivars = PhraseQuery_IVARS(self); PhraseQueryIVARS *const ovars = PhraseQuery_IVARS((PhraseQuery*)other); if (ivars->boost != ovars->boost) { return false; } if (ivars->field && !ovars->field) { return false; } if (!ivars->field && ovars->field) { return false; } if (ivars->field && !Str_Equals(ivars->field, (Obj*)ovars->field)) { return false; } if (!Vec_Equals(ovars->terms, (Obj*)ivars->terms)) { return false; } return true; }
void PhraseQuery_Serialize_IMP(PhraseQuery *self, OutStream *outstream) { PhraseQueryIVARS *const ivars = PhraseQuery_IVARS(self); OutStream_Write_F32(outstream, ivars->boost); Freezer_serialize_string(ivars->field, outstream); Freezer_serialize_varray(ivars->terms, outstream); }
void PhraseQuery_Destroy_IMP(PhraseQuery *self) { PhraseQueryIVARS *const ivars = PhraseQuery_IVARS(self); DECREF(ivars->terms); DECREF(ivars->field); SUPER_DESTROY(self, PHRASEQUERY); }
PhraseCompiler* PhraseCompiler_init(PhraseCompiler *self, PhraseQuery *parent, Searcher *searcher, float boost) { PhraseCompilerIVARS *const ivars = PhraseCompiler_IVARS(self); PhraseQueryIVARS *const parent_ivars = PhraseQuery_IVARS(parent); Schema *schema = Searcher_Get_Schema(searcher); Similarity *sim = Schema_Fetch_Sim(schema, parent_ivars->field); Vector *terms = parent_ivars->terms; // Try harder to find a Similarity if necessary. if (!sim) { sim = Schema_Get_Similarity(schema); } // Init. Compiler_init((Compiler*)self, (Query*)parent, searcher, sim, boost); // Store IDF for the phrase. ivars->idf = 0; for (uint32_t i = 0, max = Vec_Get_Size(terms); i < max; i++) { Obj *term = Vec_Fetch(terms, i); int32_t doc_max = Searcher_Doc_Max(searcher); int32_t doc_freq = Searcher_Doc_Freq(searcher, parent_ivars->field, term); ivars->idf += Sim_IDF(sim, doc_freq, doc_max); } // Calculate raw weight. ivars->raw_weight = ivars->idf * ivars->boost; return self; }
Compiler* PhraseQuery_Make_Compiler_IMP(PhraseQuery *self, Searcher *searcher, float boost, bool subordinate) { PhraseQueryIVARS *const ivars = PhraseQuery_IVARS(self); if (Vec_Get_Size(ivars->terms) == 1) { // Optimize for one-term "phrases". Obj *term = Vec_Fetch(ivars->terms, 0); TermQuery *term_query = TermQuery_new(ivars->field, term); TermCompiler *term_compiler; TermQuery_Set_Boost(term_query, ivars->boost); term_compiler = (TermCompiler*)TermQuery_Make_Compiler(term_query, searcher, boost, subordinate); DECREF(term_query); return (Compiler*)term_compiler; } else { PhraseCompiler *compiler = PhraseCompiler_new(self, searcher, boost); if (!subordinate) { PhraseCompiler_Normalize(compiler); } return (Compiler*)compiler; } }
Obj* PhraseQuery_Dump_IMP(PhraseQuery *self) { PhraseQueryIVARS *ivars = PhraseQuery_IVARS(self); PhraseQuery_Dump_t super_dump = SUPER_METHOD_PTR(PHRASEQUERY, LUCY_PhraseQuery_Dump); Hash *dump = (Hash*)CERTIFY(super_dump(self), HASH); Hash_Store_Utf8(dump, "field", 5, Freezer_dump((Obj*)ivars->field)); Hash_Store_Utf8(dump, "terms", 5, Freezer_dump((Obj*)ivars->terms)); return (Obj*)dump; }
static PhraseQuery* S_do_init(PhraseQuery *self, String *field, Vector *terms, float boost) { Query_init((Query*)self, boost); PhraseQueryIVARS *const ivars = PhraseQuery_IVARS(self); for (uint32_t i = 0, max = Vec_Get_Size(terms); i < max; i++) { CERTIFY(Vec_Fetch(terms, i), OBJ); } ivars->field = field; ivars->terms = terms; return self; }
Matcher* PhraseCompiler_Make_Matcher_IMP(PhraseCompiler *self, SegReader *reader, bool need_score) { UNUSED_VAR(need_score); PhraseCompilerIVARS *const ivars = PhraseCompiler_IVARS(self); PhraseQueryIVARS *const parent_ivars = PhraseQuery_IVARS((PhraseQuery*)ivars->parent); Vector *const terms = parent_ivars->terms; uint32_t num_terms = Vec_Get_Size(terms); // Bail if there are no terms. if (!num_terms) { return NULL; } // Bail unless field is valid and posting type supports positions. Similarity *sim = PhraseCompiler_Get_Similarity(self); Posting *posting = Sim_Make_Posting(sim); if (posting == NULL || !Obj_is_a((Obj*)posting, SCOREPOSTING)) { DECREF(posting); return NULL; } DECREF(posting); // Bail if there's no PostingListReader for this segment. PostingListReader *const plist_reader = (PostingListReader*)SegReader_Fetch( reader, Class_Get_Name(POSTINGLISTREADER)); if (!plist_reader) { return NULL; } // Look up each term. Vector *plists = Vec_new(num_terms); for (uint32_t i = 0; i < num_terms; i++) { Obj *term = Vec_Fetch(terms, i); PostingList *plist = PListReader_Posting_List(plist_reader, parent_ivars->field, term); // Bail if any one of the terms isn't in the index. if (!plist || !PList_Get_Doc_Freq(plist)) { DECREF(plist); DECREF(plists); return NULL; } Vec_Push(plists, (Obj*)plist); } Matcher *retval = (Matcher*)PhraseMatcher_new(sim, plists, (Compiler*)self); DECREF(plists); return retval; }
Obj* PhraseQuery_Load_IMP(PhraseQuery *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); PhraseQuery_Load_t super_load = SUPER_METHOD_PTR(PHRASEQUERY, LUCY_PhraseQuery_Load); PhraseQuery *loaded = (PhraseQuery*)super_load(self, dump); PhraseQueryIVARS *loaded_ivars = PhraseQuery_IVARS(loaded); Obj *field = CERTIFY(Hash_Fetch_Utf8(source, "field", 5), OBJ); loaded_ivars->field = (String*)CERTIFY(Freezer_load(field), STRING); Obj *terms = CERTIFY(Hash_Fetch_Utf8(source, "terms", 5), OBJ); loaded_ivars->terms = (Vector*)CERTIFY(Freezer_load(terms), VECTOR); return (Obj*)loaded; }
String* PhraseQuery_To_String_IMP(PhraseQuery *self) { PhraseQueryIVARS *const ivars = PhraseQuery_IVARS(self); uint32_t num_terms = Vec_Get_Size(ivars->terms); CharBuf *buf = CB_new_from_str(ivars->field); CB_Cat_Trusted_Utf8(buf, ":\"", 2); for (uint32_t i = 0; i < num_terms; i++) { Obj *term = Vec_Fetch(ivars->terms, i); String *term_string = Obj_To_String(term); CB_Cat(buf, term_string); DECREF(term_string); if (i < num_terms - 1) { CB_Cat_Trusted_Utf8(buf, " ", 1); } } CB_Cat_Trusted_Utf8(buf, "\"", 1); String *retval = CB_Yield_String(buf); DECREF(buf); return retval; }
Vector* PhraseCompiler_Highlight_Spans_IMP(PhraseCompiler *self, Searcher *searcher, DocVector *doc_vec, String *field) { PhraseCompilerIVARS *const ivars = PhraseCompiler_IVARS(self); PhraseQueryIVARS *const parent_ivars = PhraseQuery_IVARS((PhraseQuery*)ivars->parent); Vector *const terms = parent_ivars->terms; Vector *const spans = Vec_new(0); const uint32_t num_terms = Vec_Get_Size(terms); UNUSED_VAR(searcher); // Bail if no terms or field doesn't match. if (!num_terms) { return spans; } if (!Str_Equals(field, (Obj*)parent_ivars->field)) { return spans; } Vector *term_vectors = Vec_new(num_terms); BitVector *posit_vec = BitVec_new(0); BitVector *other_posit_vec = BitVec_new(0); for (uint32_t i = 0; i < num_terms; i++) { Obj *term = Vec_Fetch(terms, i); TermVector *term_vector = DocVec_Term_Vector(doc_vec, field, (String*)term); // Bail if any term is missing. if (!term_vector) { break; } Vec_Push(term_vectors, (Obj*)term_vector); if (i == 0) { // Set initial positions from first term. I32Array *positions = TV_Get_Positions(term_vector); for (uint32_t j = I32Arr_Get_Size(positions); j > 0; j--) { BitVec_Set(posit_vec, I32Arr_Get(positions, j - 1)); } } else { // Filter positions using logical "and". I32Array *positions = TV_Get_Positions(term_vector); BitVec_Clear_All(other_posit_vec); for (uint32_t j = I32Arr_Get_Size(positions); j > 0; j--) { int32_t pos = I32Arr_Get(positions, j - 1) - i; if (pos >= 0) { BitVec_Set(other_posit_vec, pos); } } BitVec_And(posit_vec, other_posit_vec); } } // Proceed only if all terms are present. uint32_t num_tvs = Vec_Get_Size(term_vectors); if (num_tvs == num_terms) { TermVector *first_tv = (TermVector*)Vec_Fetch(term_vectors, 0); TermVector *last_tv = (TermVector*)Vec_Fetch(term_vectors, num_tvs - 1); I32Array *tv_start_positions = TV_Get_Positions(first_tv); I32Array *tv_end_positions = TV_Get_Positions(last_tv); I32Array *tv_start_offsets = TV_Get_Start_Offsets(first_tv); I32Array *tv_end_offsets = TV_Get_End_Offsets(last_tv); uint32_t terms_max = num_terms - 1; I32Array *valid_posits = BitVec_To_Array(posit_vec); uint32_t num_valid_posits = I32Arr_Get_Size(valid_posits); uint32_t j = 0; float weight = PhraseCompiler_Get_Weight(self); uint32_t i = 0; // Add only those starts/ends that belong to a valid position. for (uint32_t posit_tick = 0; posit_tick < num_valid_posits; posit_tick++) { int32_t valid_start_posit = I32Arr_Get(valid_posits, posit_tick); int32_t valid_end_posit = valid_start_posit + terms_max; int32_t start_offset = 0, end_offset = 0; for (uint32_t max = I32Arr_Get_Size(tv_start_positions); i < max; i++) { if (I32Arr_Get(tv_start_positions, i) == valid_start_posit) { start_offset = I32Arr_Get(tv_start_offsets, i); break; } } for (uint32_t max = I32Arr_Get_Size(tv_end_positions); j < max; j++) { if (I32Arr_Get(tv_end_positions, j) == valid_end_posit) { end_offset = I32Arr_Get(tv_end_offsets, j); break; } } Vec_Push(spans, (Obj*)Span_new(start_offset, end_offset - start_offset, weight)); i++, j++; } DECREF(valid_posits); } DECREF(other_posit_vec); DECREF(posit_vec); DECREF(term_vectors); return spans; }
Vector* PhraseQuery_Get_Terms_IMP(PhraseQuery *self) { return PhraseQuery_IVARS(self)->terms; }
String* PhraseQuery_Get_Field_IMP(PhraseQuery *self) { return PhraseQuery_IVARS(self)->field; }