static void test_And(TestBatch *batch) { BitVector *smaller = S_create_set(1); BitVector *larger = S_create_set(2); BitVector *set_1 = S_create_set(1); BitVector *set_2 = S_create_set(2); BitVec_And(smaller, set_2); TEST_INT_EQ(batch, S_verify_logical_op(smaller, set_1, set_2, OP_AND), 50, "AND with self smaller than other"); BitVec_And(larger, set_1); TEST_INT_EQ(batch, S_verify_logical_op(larger, set_1, set_2, OP_AND), 50, "AND with other smaller than self"); DECREF(smaller); DECREF(larger); DECREF(set_1); DECREF(set_2); }
VArray* PhraseCompiler_highlight_spans(PhraseCompiler *self, Searcher *searcher, DocVector *doc_vec, const CharBuf *field) { PhraseQuery *const parent = (PhraseQuery*)self->parent; VArray *const terms = parent->terms; VArray *const spans = VA_new(0); VArray *term_vectors; BitVector *posit_vec; BitVector *other_posit_vec; uint32_t i; const uint32_t num_terms = VA_Get_Size(terms); uint32_t num_tvs; UNUSED_VAR(searcher); // Bail if no terms or field doesn't match. if (!num_terms) { return spans; } if (!CB_Equals(field, (Obj*)parent->field)) { return spans; } term_vectors = VA_new(num_terms); posit_vec = BitVec_new(0); other_posit_vec = BitVec_new(0); for (i = 0; i < num_terms; i++) { Obj *term = VA_Fetch(terms, i); TermVector *term_vector = DocVec_Term_Vector(doc_vec, field, (CharBuf*)term); // Bail if any term is missing. if (!term_vector) break; VA_Push(term_vectors, (Obj*)term_vector); if (i == 0) { // Set initial positions from first term. uint32_t j; I32Array *positions = TV_Get_Positions(term_vector); for (j = I32Arr_Get_Size(positions); j > 0; j--) { BitVec_Set(posit_vec, I32Arr_Get(positions, j - 1)); } } else { // Filter positions using logical "and". uint32_t j; I32Array *positions = TV_Get_Positions(term_vector); BitVec_Clear_All(other_posit_vec); for (j = I32Arr_Get_Size(positions); j > 0; j--) { int32_t pos = I32Arr_Get(positions, j - 1) - i; if (pos >= 0) { BitVec_Set(other_posit_vec, pos); } } BitVec_And(posit_vec, other_posit_vec); } } // Proceed only if all terms are present. num_tvs = VA_Get_Size(term_vectors); if (num_tvs == num_terms) { TermVector *first_tv = (TermVector*)VA_Fetch(term_vectors, 0); TermVector *last_tv = (TermVector*)VA_Fetch(term_vectors, num_tvs - 1); I32Array *tv_start_positions = TV_Get_Positions(first_tv); I32Array *tv_end_positions = TV_Get_Positions(last_tv); I32Array *tv_start_offsets = TV_Get_Start_Offsets(first_tv); I32Array *tv_end_offsets = TV_Get_End_Offsets(last_tv); uint32_t terms_max = num_terms - 1; I32Array *valid_posits = BitVec_To_Array(posit_vec); uint32_t num_valid_posits = I32Arr_Get_Size(valid_posits); uint32_t j = 0; uint32_t posit_tick; float weight = PhraseCompiler_Get_Weight(self); i = 0; // Add only those starts/ends that belong to a valid position. for (posit_tick = 0; posit_tick < num_valid_posits; posit_tick++) { int32_t valid_start_posit = I32Arr_Get(valid_posits, posit_tick); int32_t valid_end_posit = valid_start_posit + terms_max; int32_t start_offset = 0, end_offset = 0; uint32_t max; for (max = I32Arr_Get_Size(tv_start_positions); i < max; i++) { if (I32Arr_Get(tv_start_positions, i) == valid_start_posit) { start_offset = I32Arr_Get(tv_start_offsets, i); break; } } for (max = I32Arr_Get_Size(tv_end_positions); j < max; j++) { if (I32Arr_Get(tv_end_positions, j) == valid_end_posit) { end_offset = I32Arr_Get(tv_end_offsets, j); break; } } VA_Push(spans, (Obj*)Span_new(start_offset, end_offset - start_offset, weight) ); i++, j++; } DECREF(valid_posits); } DECREF(other_posit_vec); DECREF(posit_vec); DECREF(term_vectors); return spans; }