static void test_To_Array(TestBatch *batch) { uint64_t *source_ints = TestUtils_random_u64s(NULL, 20, 0, 200); BitVector *bit_vec = BitVec_new(0); I32Array *array; long num_unique = 0; long i; // Unique the random ints. Sort_quicksort(source_ints, 20, sizeof(uint64_t), S_compare_u64s, NULL); for (i = 0; i < 19; i++) { if (source_ints[i] != source_ints[i + 1]) { source_ints[num_unique] = source_ints[i]; num_unique++; } } // Set bits. for (i = 0; i < num_unique; i++) { BitVec_Set(bit_vec, (uint32_t)source_ints[i]); } // Create the array and compare it to the source. array = BitVec_To_Array(bit_vec); for (i = 0; i < num_unique; i++) { if (I32Arr_Get(array, i) != (int32_t)source_ints[i]) { break; } } TEST_INT_EQ(batch, i, num_unique, "To_Array (%ld == %ld)", i, num_unique); DECREF(array); DECREF(bit_vec); FREEMEM(source_ints); }
static void test_To_Array(TestBatchRunner *runner) { uint64_t *source_ints = TestUtils_random_u64s(NULL, 20, 0, 200); BitVector *bit_vec = BitVec_new(0); I32Array *array; unsigned num_unique = 0; // Unique the random ints. qsort(source_ints, 20, sizeof(uint64_t), S_compare_u64s); for (unsigned i = 0; i < 19; i++) { if (source_ints[i] != source_ints[i + 1]) { source_ints[num_unique] = source_ints[i]; num_unique++; } } // Set bits. for (unsigned i = 0; i < num_unique; i++) { BitVec_Set(bit_vec, (size_t)source_ints[i]); } // Create the array and compare it to the source. array = BitVec_To_Array(bit_vec); unsigned i; for (i = 0; i < num_unique; i++) { if (I32Arr_Get(array, (size_t)i) != (int32_t)source_ints[i]) { break; } } TEST_UINT_EQ(runner, i, num_unique, "To_Array (%u == %u)", i, num_unique); DECREF(array); DECREF(bit_vec); FREEMEM(source_ints); }
VArray* PhraseCompiler_highlight_spans(PhraseCompiler *self, Searcher *searcher, DocVector *doc_vec, const CharBuf *field) { PhraseQuery *const parent = (PhraseQuery*)self->parent; VArray *const terms = parent->terms; VArray *const spans = VA_new(0); VArray *term_vectors; BitVector *posit_vec; BitVector *other_posit_vec; uint32_t i; const uint32_t num_terms = VA_Get_Size(terms); uint32_t num_tvs; UNUSED_VAR(searcher); // Bail if no terms or field doesn't match. if (!num_terms) { return spans; } if (!CB_Equals(field, (Obj*)parent->field)) { return spans; } term_vectors = VA_new(num_terms); posit_vec = BitVec_new(0); other_posit_vec = BitVec_new(0); for (i = 0; i < num_terms; i++) { Obj *term = VA_Fetch(terms, i); TermVector *term_vector = DocVec_Term_Vector(doc_vec, field, (CharBuf*)term); // Bail if any term is missing. if (!term_vector) break; VA_Push(term_vectors, (Obj*)term_vector); if (i == 0) { // Set initial positions from first term. uint32_t j; I32Array *positions = TV_Get_Positions(term_vector); for (j = I32Arr_Get_Size(positions); j > 0; j--) { BitVec_Set(posit_vec, I32Arr_Get(positions, j - 1)); } } else { // Filter positions using logical "and". uint32_t j; I32Array *positions = TV_Get_Positions(term_vector); BitVec_Clear_All(other_posit_vec); for (j = I32Arr_Get_Size(positions); j > 0; j--) { int32_t pos = I32Arr_Get(positions, j - 1) - i; if (pos >= 0) { BitVec_Set(other_posit_vec, pos); } } BitVec_And(posit_vec, other_posit_vec); } } // Proceed only if all terms are present. num_tvs = VA_Get_Size(term_vectors); if (num_tvs == num_terms) { TermVector *first_tv = (TermVector*)VA_Fetch(term_vectors, 0); TermVector *last_tv = (TermVector*)VA_Fetch(term_vectors, num_tvs - 1); I32Array *tv_start_positions = TV_Get_Positions(first_tv); I32Array *tv_end_positions = TV_Get_Positions(last_tv); I32Array *tv_start_offsets = TV_Get_Start_Offsets(first_tv); I32Array *tv_end_offsets = TV_Get_End_Offsets(last_tv); uint32_t terms_max = num_terms - 1; I32Array *valid_posits = BitVec_To_Array(posit_vec); uint32_t num_valid_posits = I32Arr_Get_Size(valid_posits); uint32_t j = 0; uint32_t posit_tick; float weight = PhraseCompiler_Get_Weight(self); i = 0; // Add only those starts/ends that belong to a valid position. for (posit_tick = 0; posit_tick < num_valid_posits; posit_tick++) { int32_t valid_start_posit = I32Arr_Get(valid_posits, posit_tick); int32_t valid_end_posit = valid_start_posit + terms_max; int32_t start_offset = 0, end_offset = 0; uint32_t max; for (max = I32Arr_Get_Size(tv_start_positions); i < max; i++) { if (I32Arr_Get(tv_start_positions, i) == valid_start_posit) { start_offset = I32Arr_Get(tv_start_offsets, i); break; } } for (max = I32Arr_Get_Size(tv_end_positions); j < max; j++) { if (I32Arr_Get(tv_end_positions, j) == valid_end_posit) { end_offset = I32Arr_Get(tv_end_offsets, j); break; } } VA_Push(spans, (Obj*)Span_new(start_offset, end_offset - start_offset, weight) ); i++, j++; } DECREF(valid_posits); } DECREF(other_posit_vec); DECREF(posit_vec); DECREF(term_vectors); return spans; }