static void test_Set_and_Get(TestBatch *batch) { unsigned i, max; const uint32_t three = 3; const uint32_t seventeen = 17; BitVector *bit_vec = BitVec_new(8); BitVec_Set(bit_vec, three); TEST_TRUE(batch, BitVec_Get_Capacity(bit_vec) < seventeen, "set below cap"); BitVec_Set(bit_vec, seventeen); TEST_TRUE(batch, BitVec_Get_Capacity(bit_vec) > seventeen, "set above cap causes BitVector to grow"); for (i = 0, max = BitVec_Get_Capacity(bit_vec); i < max; i++) { if (i == three || i == seventeen) { TEST_TRUE(batch, BitVec_Get(bit_vec, i), "set/get %d", i); } else { TEST_FALSE(batch, BitVec_Get(bit_vec, i), "get %d", i); } } TEST_FALSE(batch, BitVec_Get(bit_vec, i), "out of range get"); DECREF(bit_vec); }
static void test_Set_and_Get(TestBatchRunner *runner) { const size_t three = 3; const size_t seventeen = 17; BitVector *bit_vec = BitVec_new(8); BitVec_Set(bit_vec, three); TEST_TRUE(runner, BitVec_Get_Capacity(bit_vec) < seventeen, "set below cap"); BitVec_Set(bit_vec, seventeen); TEST_TRUE(runner, BitVec_Get_Capacity(bit_vec) > seventeen, "set above cap causes BitVector to grow"); size_t i, max; for (i = 0, max = BitVec_Get_Capacity(bit_vec); i < max; i++) { if (i == three || i == seventeen) { TEST_TRUE(runner, BitVec_Get(bit_vec, i), "set/get %u", (unsigned)i); } else { TEST_FALSE(runner, BitVec_Get(bit_vec, i), "get %u", (unsigned)i); } } TEST_FALSE(runner, BitVec_Get(bit_vec, i), "out of range get"); DECREF(bit_vec); }
static void test_Mimic(TestBatch *batch) { int foo; for (foo = 0; foo <= 17; foo++) { int bar; for (bar = 0; bar <= 17; bar++) { int i; BitVector *foo_vec = BitVec_new(0); BitVector *bar_vec = BitVec_new(0); BitVec_Set(foo_vec, foo); BitVec_Set(bar_vec, bar); BitVec_Mimic(foo_vec, (Obj*)bar_vec); for (i = 0; i <= 17; i++) { if (BitVec_Get(foo_vec, i) && i != bar) { break; } } TEST_INT_EQ(batch, i, 18, "Mimic(%d, %d)", foo, bar); DECREF(foo_vec); DECREF(bar_vec); } } }
static void test_To_Array(TestBatch *batch) { uint64_t *source_ints = TestUtils_random_u64s(NULL, 20, 0, 200); BitVector *bit_vec = BitVec_new(0); I32Array *array; long num_unique = 0; long i; // Unique the random ints. Sort_quicksort(source_ints, 20, sizeof(uint64_t), S_compare_u64s, NULL); for (i = 0; i < 19; i++) { if (source_ints[i] != source_ints[i + 1]) { source_ints[num_unique] = source_ints[i]; num_unique++; } } // Set bits. for (i = 0; i < num_unique; i++) { BitVec_Set(bit_vec, (uint32_t)source_ints[i]); } // Create the array and compare it to the source. array = BitVec_To_Array(bit_vec); for (i = 0; i < num_unique; i++) { if (I32Arr_Get(array, i) != (int32_t)source_ints[i]) { break; } } TEST_INT_EQ(batch, i, num_unique, "To_Array (%ld == %ld)", i, num_unique); DECREF(array); DECREF(bit_vec); FREEMEM(source_ints); }
static void test_To_Array(TestBatchRunner *runner) { uint64_t *source_ints = TestUtils_random_u64s(NULL, 20, 0, 200); BitVector *bit_vec = BitVec_new(0); I32Array *array; unsigned num_unique = 0; // Unique the random ints. qsort(source_ints, 20, sizeof(uint64_t), S_compare_u64s); for (unsigned i = 0; i < 19; i++) { if (source_ints[i] != source_ints[i + 1]) { source_ints[num_unique] = source_ints[i]; num_unique++; } } // Set bits. for (unsigned i = 0; i < num_unique; i++) { BitVec_Set(bit_vec, (size_t)source_ints[i]); } // Create the array and compare it to the source. array = BitVec_To_Array(bit_vec); unsigned i; for (i = 0; i < num_unique; i++) { if (I32Arr_Get(array, (size_t)i) != (int32_t)source_ints[i]) { break; } } TEST_UINT_EQ(runner, i, num_unique, "To_Array (%u == %u)", i, num_unique); DECREF(array); DECREF(bit_vec); FREEMEM(source_ints); }
void DefDelWriter_Delete_By_Term_IMP(DefaultDeletionsWriter *self, String *field, Obj *term) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i); PostingListReader *plist_reader = (PostingListReader*)SegReader_Fetch( seg_reader, Class_Get_Name(POSTINGLISTREADER)); BitVector *bit_vec = (BitVector*)VA_Fetch(ivars->bit_vecs, i); PostingList *plist = plist_reader ? PListReader_Posting_List(plist_reader, field, term) : NULL; int32_t doc_id; int32_t num_zapped = 0; // Iterate through postings, marking each doc as deleted. if (plist) { while (0 != (doc_id = PList_Next(plist))) { num_zapped += !BitVec_Get(bit_vec, doc_id); BitVec_Set(bit_vec, doc_id); } if (num_zapped) { ivars->updated[i] = true; } DECREF(plist); } } }
void DefDelWriter_Delete_By_Query_IMP(DefaultDeletionsWriter *self, Query *query) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); Compiler *compiler = Query_Make_Compiler(query, (Searcher*)ivars->searcher, Query_Get_Boost(query), false); for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i); BitVector *bit_vec = (BitVector*)VA_Fetch(ivars->bit_vecs, i); Matcher *matcher = Compiler_Make_Matcher(compiler, seg_reader, false); if (matcher) { int32_t doc_id; int32_t num_zapped = 0; // Iterate through matches, marking each doc as deleted. while (0 != (doc_id = Matcher_Next(matcher))) { num_zapped += !BitVec_Get(bit_vec, doc_id); BitVec_Set(bit_vec, doc_id); } if (num_zapped) { ivars->updated[i] = true; } DECREF(matcher); } } DECREF(compiler); }
static SeriesMatcher* S_make_series_matcher(I32Array *doc_ids, I32Array *offsets, int32_t doc_max) { int32_t num_doc_ids = I32Arr_Get_Size(doc_ids); int32_t num_matchers = I32Arr_Get_Size(offsets); VArray *matchers = VA_new(num_matchers); int32_t tick = 0; int32_t i; // Divvy up doc_ids by segment into BitVectors. for (i = 0; i < num_matchers; i++) { int32_t offset = I32Arr_Get(offsets, i); int32_t max = i == num_matchers - 1 ? doc_max + 1 : I32Arr_Get(offsets, i + 1); BitVector *bit_vec = BitVec_new(max - offset); while (tick < num_doc_ids) { int32_t doc_id = I32Arr_Get(doc_ids, tick); if (doc_id > max) { break; } else { tick++; } BitVec_Set(bit_vec, doc_id - offset); } VA_Push(matchers, (Obj*)BitVecMatcher_new(bit_vec)); DECREF(bit_vec); } SeriesMatcher *series_matcher = SeriesMatcher_new(matchers, offsets); DECREF(matchers); return series_matcher; }
// Valgrind only - detect off-by-one error. static void test_off_by_one_error() { for (unsigned cap = 5; cap <= 24; cap++) { BitVector *bit_vec = BitVec_new(cap); BitVec_Set(bit_vec, cap - 2); DECREF(bit_vec); } }
static void test_Clone(TestBatchRunner *runner) { BitVector *self = BitVec_new(30); BitVector *twin; BitVec_Set(self, 2); BitVec_Set(self, 3); BitVec_Set(self, 10); BitVec_Set(self, 20); twin = BitVec_Clone(self); size_t i; for (i = 0; i < 50; i++) { if (BitVec_Get(self, i) != BitVec_Get(twin, i)) { break; } } TEST_UINT_EQ(runner, i, 50, "Clone"); TEST_UINT_EQ(runner, BitVec_Count(twin), 4, "clone Count"); DECREF(self); DECREF(twin); }
static void test_Clone(TestBatch *batch) { int i; BitVector *self = BitVec_new(30); BitVector *twin; BitVec_Set(self, 2); BitVec_Set(self, 3); BitVec_Set(self, 10); BitVec_Set(self, 20); twin = BitVec_Clone(self); for (i = 0; i < 50; i++) { if (BitVec_Get(self, i) != BitVec_Get(twin, i)) { break; } } TEST_INT_EQ(batch, i, 50, "Clone"); TEST_INT_EQ(batch, BitVec_Count(twin), 4, "clone Count"); DECREF(self); DECREF(twin); }
static BitVector* S_create_set(int set_num) { unsigned nums_1[] = { 1, 2, 3, 10, 20, 30, 0 }; unsigned nums_2[] = { 2, 3, 4, 5, 6, 7, 8, 9, 10, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 0 }; unsigned *nums = set_num == 1 ? nums_1 : nums_2; BitVector *bit_vec = BitVec_new(31); for (unsigned i = 0; nums[i] != 0; i++) { BitVec_Set(bit_vec, nums[i]); } return bit_vec; }
static void test_Mimic(TestBatchRunner *runner) { for (unsigned foo = 0; foo <= 17; foo++) { for (unsigned bar = 0; bar <= 17; bar++) { BitVector *foo_vec = BitVec_new(0); BitVector *bar_vec = BitVec_new(0); BitVec_Set(foo_vec, foo); BitVec_Set(bar_vec, bar); BitVec_Mimic(foo_vec, (Obj*)bar_vec); unsigned i; for (i = 0; i <= 17; i++) { if (BitVec_Get(foo_vec, i) && i != bar) { break; } } TEST_UINT_EQ(runner, i, 18, "Mimic(%u, %u)", foo, bar); DECREF(foo_vec); DECREF(bar_vec); } } }
void DefDelWriter_Delete_By_Doc_ID_IMP(DefaultDeletionsWriter *self, int32_t doc_id) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); uint32_t sub_tick = PolyReader_sub_tick(ivars->seg_starts, doc_id); BitVector *bit_vec = (BitVector*)VA_Fetch(ivars->bit_vecs, sub_tick); uint32_t offset = I32Arr_Get(ivars->seg_starts, sub_tick); int32_t seg_doc_id = doc_id - offset; if (!BitVec_Get(bit_vec, seg_doc_id)) { ivars->updated[sub_tick] = true; BitVec_Set(bit_vec, seg_doc_id); } }
static void test_Next_Hit(TestBatchRunner *runner) { for (int i = 24; i <= 33; i++) { BitVector *bit_vec = BitVec_new(64); BitVec_Set(bit_vec, (size_t)i); TEST_INT_EQ(runner, BitVec_Next_Hit(bit_vec, 0), i, "Next_Hit for 0 is %d", i); TEST_INT_EQ(runner, BitVec_Next_Hit(bit_vec, 0), i, "Next_Hit for 1 is %d", i); for (int probe = 15; probe <= i; probe++) { TEST_INT_EQ(runner, BitVec_Next_Hit(bit_vec, (size_t)probe), i, "Next_Hit for %d is %d", probe, i); } for (int probe = i + 1; probe <= i + 9; probe++) { TEST_INT_EQ(runner, BitVec_Next_Hit(bit_vec, (size_t)probe), -1, "no Next_Hit for %d when max is %d", probe, i); } DECREF(bit_vec); } }
DefaultDeletionsWriter* DefDelWriter_init(DefaultDeletionsWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader) { DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader); DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); ivars->seg_readers = PolyReader_Seg_Readers(polyreader); uint32_t num_seg_readers = VA_Get_Size(ivars->seg_readers); ivars->seg_starts = PolyReader_Offsets(polyreader); ivars->bit_vecs = VA_new(num_seg_readers); ivars->updated = (bool*)CALLOCATE(num_seg_readers, sizeof(bool)); ivars->searcher = IxSearcher_new((Obj*)polyreader); ivars->name_to_tick = Hash_new(num_seg_readers); // Materialize a BitVector of deletions for each segment. for (uint32_t i = 0; i < num_seg_readers; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i); BitVector *bit_vec = BitVec_new(SegReader_Doc_Max(seg_reader)); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Fetch( seg_reader, Class_Get_Name(DELETIONSREADER)); Matcher *seg_dels = del_reader ? DelReader_Iterator(del_reader) : NULL; if (seg_dels) { int32_t del; while (0 != (del = Matcher_Next(seg_dels))) { BitVec_Set(bit_vec, del); } DECREF(seg_dels); } VA_Store(ivars->bit_vecs, i, (Obj*)bit_vec); Hash_Store(ivars->name_to_tick, (Obj*)SegReader_Get_Seg_Name(seg_reader), (Obj*)Int32_new(i)); } return self; }
static void test_Count(TestBatch *batch) { int i; int shuffled[64]; BitVector *bit_vec = BitVec_new(64); for (i = 0; i < 64; i++) { shuffled[i] = i; } for (i = 0; i < 64; i++) { int shuffle_pos = rand() % 64; int temp = shuffled[shuffle_pos]; shuffled[shuffle_pos] = shuffled[i]; shuffled[i] = temp; } for (i = 0; i < 64; i++) { BitVec_Set(bit_vec, shuffled[i]); if (BitVec_Count(bit_vec) != (uint32_t)(i + 1)) { break; } } TEST_INT_EQ(batch, i, 64, "Count() returns the right number of bits"); DECREF(bit_vec); }
static void test_Count(TestBatchRunner *runner) { unsigned shuffled[64]; BitVector *bit_vec = BitVec_new(64); for (unsigned i = 0; i < 64; i++) { shuffled[i] = i; } for (unsigned i = 0; i < 64; i++) { unsigned shuffle_pos = (unsigned)rand() % 64; unsigned temp = shuffled[shuffle_pos]; shuffled[shuffle_pos] = shuffled[i]; shuffled[i] = temp; } unsigned i; for (i = 0; i < 64; i++) { BitVec_Set(bit_vec, shuffled[i]); if (BitVec_Count(bit_vec) != (uint32_t)(i + 1)) { break; } } TEST_UINT_EQ(runner, i, 64, "Count() returns the right number of bits"); DECREF(bit_vec); }
static void test_Next_Hit(TestBatch *batch) { int i; for (i = 24; i <= 33; i++) { int probe; BitVector *bit_vec = BitVec_new(64); BitVec_Set(bit_vec, i); TEST_INT_EQ(batch, BitVec_Next_Hit(bit_vec, 0), i, "Next_Hit for 0 is %d", i); TEST_INT_EQ(batch, BitVec_Next_Hit(bit_vec, 0), i, "Next_Hit for 1 is %d", i); for (probe = 15; probe <= i; probe++) { TEST_INT_EQ(batch, BitVec_Next_Hit(bit_vec, probe), i, "Next_Hit for %d is %d", probe, i); } for (probe = i + 1; probe <= i + 9; probe++) { TEST_INT_EQ(batch, BitVec_Next_Hit(bit_vec, probe), -1, "no Next_Hit for %d when max is %d", probe, i); } DECREF(bit_vec); } }
VArray* PhraseCompiler_highlight_spans(PhraseCompiler *self, Searcher *searcher, DocVector *doc_vec, const CharBuf *field) { PhraseQuery *const parent = (PhraseQuery*)self->parent; VArray *const terms = parent->terms; VArray *const spans = VA_new(0); VArray *term_vectors; BitVector *posit_vec; BitVector *other_posit_vec; uint32_t i; const uint32_t num_terms = VA_Get_Size(terms); uint32_t num_tvs; UNUSED_VAR(searcher); // Bail if no terms or field doesn't match. if (!num_terms) { return spans; } if (!CB_Equals(field, (Obj*)parent->field)) { return spans; } term_vectors = VA_new(num_terms); posit_vec = BitVec_new(0); other_posit_vec = BitVec_new(0); for (i = 0; i < num_terms; i++) { Obj *term = VA_Fetch(terms, i); TermVector *term_vector = DocVec_Term_Vector(doc_vec, field, (CharBuf*)term); // Bail if any term is missing. if (!term_vector) break; VA_Push(term_vectors, (Obj*)term_vector); if (i == 0) { // Set initial positions from first term. uint32_t j; I32Array *positions = TV_Get_Positions(term_vector); for (j = I32Arr_Get_Size(positions); j > 0; j--) { BitVec_Set(posit_vec, I32Arr_Get(positions, j - 1)); } } else { // Filter positions using logical "and". uint32_t j; I32Array *positions = TV_Get_Positions(term_vector); BitVec_Clear_All(other_posit_vec); for (j = I32Arr_Get_Size(positions); j > 0; j--) { int32_t pos = I32Arr_Get(positions, j - 1) - i; if (pos >= 0) { BitVec_Set(other_posit_vec, pos); } } BitVec_And(posit_vec, other_posit_vec); } } // Proceed only if all terms are present. num_tvs = VA_Get_Size(term_vectors); if (num_tvs == num_terms) { TermVector *first_tv = (TermVector*)VA_Fetch(term_vectors, 0); TermVector *last_tv = (TermVector*)VA_Fetch(term_vectors, num_tvs - 1); I32Array *tv_start_positions = TV_Get_Positions(first_tv); I32Array *tv_end_positions = TV_Get_Positions(last_tv); I32Array *tv_start_offsets = TV_Get_Start_Offsets(first_tv); I32Array *tv_end_offsets = TV_Get_End_Offsets(last_tv); uint32_t terms_max = num_terms - 1; I32Array *valid_posits = BitVec_To_Array(posit_vec); uint32_t num_valid_posits = I32Arr_Get_Size(valid_posits); uint32_t j = 0; uint32_t posit_tick; float weight = PhraseCompiler_Get_Weight(self); i = 0; // Add only those starts/ends that belong to a valid position. for (posit_tick = 0; posit_tick < num_valid_posits; posit_tick++) { int32_t valid_start_posit = I32Arr_Get(valid_posits, posit_tick); int32_t valid_end_posit = valid_start_posit + terms_max; int32_t start_offset = 0, end_offset = 0; uint32_t max; for (max = I32Arr_Get_Size(tv_start_positions); i < max; i++) { if (I32Arr_Get(tv_start_positions, i) == valid_start_posit) { start_offset = I32Arr_Get(tv_start_offsets, i); break; } } for (max = I32Arr_Get_Size(tv_end_positions); j < max; j++) { if (I32Arr_Get(tv_end_positions, j) == valid_end_posit) { end_offset = I32Arr_Get(tv_end_offsets, j); break; } } VA_Push(spans, (Obj*)Span_new(start_offset, end_offset - start_offset, weight) ); i++, j++; } DECREF(valid_posits); } DECREF(other_posit_vec); DECREF(posit_vec); DECREF(term_vectors); return spans; }