Exemple #1
0
static void
test_Set_and_Get(TestBatch *batch) {
    unsigned i, max;
    const uint32_t  three     = 3;
    const uint32_t  seventeen = 17;
    BitVector      *bit_vec   = BitVec_new(8);

    BitVec_Set(bit_vec, three);
    TEST_TRUE(batch, BitVec_Get_Capacity(bit_vec) < seventeen,
              "set below cap");
    BitVec_Set(bit_vec, seventeen);
    TEST_TRUE(batch, BitVec_Get_Capacity(bit_vec) > seventeen,
              "set above cap causes BitVector to grow");

    for (i = 0, max = BitVec_Get_Capacity(bit_vec); i < max; i++) {
        if (i == three || i == seventeen) {
            TEST_TRUE(batch, BitVec_Get(bit_vec, i), "set/get %d", i);
        }
        else {
            TEST_FALSE(batch, BitVec_Get(bit_vec, i), "get %d", i);
        }
    }
    TEST_FALSE(batch, BitVec_Get(bit_vec, i), "out of range get");

    DECREF(bit_vec);
}
Exemple #2
0
static void
test_Set_and_Get(TestBatchRunner *runner) {
    const size_t  three     = 3;
    const size_t  seventeen = 17;
    BitVector    *bit_vec   = BitVec_new(8);

    BitVec_Set(bit_vec, three);
    TEST_TRUE(runner, BitVec_Get_Capacity(bit_vec) < seventeen,
              "set below cap");
    BitVec_Set(bit_vec, seventeen);
    TEST_TRUE(runner, BitVec_Get_Capacity(bit_vec) > seventeen,
              "set above cap causes BitVector to grow");

    size_t i, max;
    for (i = 0, max = BitVec_Get_Capacity(bit_vec); i < max; i++) {
        if (i == three || i == seventeen) {
            TEST_TRUE(runner, BitVec_Get(bit_vec, i), "set/get %u", (unsigned)i);
        }
        else {
            TEST_FALSE(runner, BitVec_Get(bit_vec, i), "get %u", (unsigned)i);
        }
    }
    TEST_FALSE(runner, BitVec_Get(bit_vec, i), "out of range get");

    DECREF(bit_vec);
}
Exemple #3
0
static void
test_Mimic(TestBatch *batch) {
    int foo;

    for (foo = 0; foo <= 17; foo++) {
        int bar;
        for (bar = 0; bar <= 17; bar++) {
            int i;
            BitVector *foo_vec = BitVec_new(0);
            BitVector *bar_vec = BitVec_new(0);

            BitVec_Set(foo_vec, foo);
            BitVec_Set(bar_vec, bar);
            BitVec_Mimic(foo_vec, (Obj*)bar_vec);

            for (i = 0; i <= 17; i++) {
                if (BitVec_Get(foo_vec, i) && i != bar) { break; }
            }
            TEST_INT_EQ(batch, i, 18, "Mimic(%d, %d)", foo, bar);

            DECREF(foo_vec);
            DECREF(bar_vec);
        }
    }
}
Exemple #4
0
static void
test_To_Array(TestBatch *batch) {
    uint64_t  *source_ints = TestUtils_random_u64s(NULL, 20, 0, 200);
    BitVector *bit_vec = BitVec_new(0);
    I32Array  *array;
    long       num_unique = 0;
    long       i;

    // Unique the random ints.
    Sort_quicksort(source_ints, 20, sizeof(uint64_t),
                   S_compare_u64s, NULL);
    for (i = 0; i < 19; i++) {
        if (source_ints[i] != source_ints[i + 1]) {
            source_ints[num_unique] = source_ints[i];
            num_unique++;
        }
    }

    // Set bits.
    for (i = 0; i < num_unique; i++) {
        BitVec_Set(bit_vec, (uint32_t)source_ints[i]);
    }

    // Create the array and compare it to the source.
    array = BitVec_To_Array(bit_vec);
    for (i = 0; i < num_unique; i++) {
        if (I32Arr_Get(array, i) != (int32_t)source_ints[i]) { break; }
    }
    TEST_INT_EQ(batch, i, num_unique, "To_Array (%ld == %ld)", i,
                num_unique);

    DECREF(array);
    DECREF(bit_vec);
    FREEMEM(source_ints);
}
Exemple #5
0
static void
test_To_Array(TestBatchRunner *runner) {
    uint64_t  *source_ints = TestUtils_random_u64s(NULL, 20, 0, 200);
    BitVector *bit_vec = BitVec_new(0);
    I32Array  *array;
    unsigned   num_unique = 0;

    // Unique the random ints.
    qsort(source_ints, 20, sizeof(uint64_t), S_compare_u64s);
    for (unsigned i = 0; i < 19; i++) {
        if (source_ints[i] != source_ints[i + 1]) {
            source_ints[num_unique] = source_ints[i];
            num_unique++;
        }
    }

    // Set bits.
    for (unsigned i = 0; i < num_unique; i++) {
        BitVec_Set(bit_vec, (size_t)source_ints[i]);
    }

    // Create the array and compare it to the source.
    array = BitVec_To_Array(bit_vec);
    unsigned i;
    for (i = 0; i < num_unique; i++) {
        if (I32Arr_Get(array, (size_t)i) != (int32_t)source_ints[i]) { break; }
    }
    TEST_UINT_EQ(runner, i, num_unique, "To_Array (%u == %u)", i,
                 num_unique);

    DECREF(array);
    DECREF(bit_vec);
    FREEMEM(source_ints);
}
void
DefDelWriter_Delete_By_Term_IMP(DefaultDeletionsWriter *self,
                                String *field, Obj *term) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) {
        SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i);
        PostingListReader *plist_reader
            = (PostingListReader*)SegReader_Fetch(
                  seg_reader, Class_Get_Name(POSTINGLISTREADER));
        BitVector *bit_vec = (BitVector*)VA_Fetch(ivars->bit_vecs, i);
        PostingList *plist = plist_reader
                             ? PListReader_Posting_List(plist_reader, field, term)
                             : NULL;
        int32_t doc_id;
        int32_t num_zapped = 0;

        // Iterate through postings, marking each doc as deleted.
        if (plist) {
            while (0 != (doc_id = PList_Next(plist))) {
                num_zapped += !BitVec_Get(bit_vec, doc_id);
                BitVec_Set(bit_vec, doc_id);
            }
            if (num_zapped) { ivars->updated[i] = true; }
            DECREF(plist);
        }
    }
}
void
DefDelWriter_Delete_By_Query_IMP(DefaultDeletionsWriter *self, Query *query) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    Compiler *compiler = Query_Make_Compiler(query, (Searcher*)ivars->searcher,
                                             Query_Get_Boost(query), false);

    for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) {
        SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i);
        BitVector *bit_vec = (BitVector*)VA_Fetch(ivars->bit_vecs, i);
        Matcher *matcher = Compiler_Make_Matcher(compiler, seg_reader, false);

        if (matcher) {
            int32_t doc_id;
            int32_t num_zapped = 0;

            // Iterate through matches, marking each doc as deleted.
            while (0 != (doc_id = Matcher_Next(matcher))) {
                num_zapped += !BitVec_Get(bit_vec, doc_id);
                BitVec_Set(bit_vec, doc_id);
            }
            if (num_zapped) { ivars->updated[i] = true; }

            DECREF(matcher);
        }
    }

    DECREF(compiler);
}
Exemple #8
0
static SeriesMatcher*
S_make_series_matcher(I32Array *doc_ids, I32Array *offsets, int32_t doc_max) {
    int32_t  num_doc_ids  = I32Arr_Get_Size(doc_ids);
    int32_t  num_matchers = I32Arr_Get_Size(offsets);
    VArray  *matchers     = VA_new(num_matchers);
    int32_t  tick         = 0;
    int32_t  i;

    // Divvy up doc_ids by segment into BitVectors.
    for (i = 0; i < num_matchers; i++) {
        int32_t offset = I32Arr_Get(offsets, i);
        int32_t max    = i == num_matchers - 1
                         ? doc_max + 1
                         : I32Arr_Get(offsets, i + 1);
        BitVector *bit_vec = BitVec_new(max - offset);
        while (tick < num_doc_ids) {
            int32_t doc_id = I32Arr_Get(doc_ids, tick);
            if (doc_id > max) { break; }
            else               { tick++; }
            BitVec_Set(bit_vec, doc_id - offset);
        }
        VA_Push(matchers, (Obj*)BitVecMatcher_new(bit_vec));
        DECREF(bit_vec);
    }

    SeriesMatcher *series_matcher = SeriesMatcher_new(matchers, offsets);
    DECREF(matchers);
    return series_matcher;
}
Exemple #9
0
// Valgrind only - detect off-by-one error.
static void
test_off_by_one_error() {
    for (unsigned cap = 5; cap <= 24; cap++) {
        BitVector *bit_vec = BitVec_new(cap);
        BitVec_Set(bit_vec, cap - 2);
        DECREF(bit_vec);
    }
}
Exemple #10
0
static void
test_Clone(TestBatchRunner *runner) {
    BitVector *self = BitVec_new(30);
    BitVector *twin;

    BitVec_Set(self, 2);
    BitVec_Set(self, 3);
    BitVec_Set(self, 10);
    BitVec_Set(self, 20);

    twin = BitVec_Clone(self);
    size_t i;
    for (i = 0; i < 50; i++) {
        if (BitVec_Get(self, i) != BitVec_Get(twin, i)) { break; }
    }
    TEST_UINT_EQ(runner, i, 50, "Clone");
    TEST_UINT_EQ(runner, BitVec_Count(twin), 4, "clone Count");

    DECREF(self);
    DECREF(twin);
}
Exemple #11
0
static void
test_Clone(TestBatch *batch) {
    int i;
    BitVector *self = BitVec_new(30);
    BitVector *twin;

    BitVec_Set(self, 2);
    BitVec_Set(self, 3);
    BitVec_Set(self, 10);
    BitVec_Set(self, 20);

    twin = BitVec_Clone(self);
    for (i = 0; i < 50; i++) {
        if (BitVec_Get(self, i) != BitVec_Get(twin, i)) { break; }
    }
    TEST_INT_EQ(batch, i, 50, "Clone");
    TEST_INT_EQ(batch, BitVec_Count(twin), 4, "clone Count");

    DECREF(self);
    DECREF(twin);
}
Exemple #12
0
static BitVector*
S_create_set(int set_num) {
    unsigned nums_1[] = { 1, 2, 3, 10, 20, 30, 0 };
    unsigned nums_2[] = { 2, 3, 4, 5, 6, 7, 8, 9, 10,
                     25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 0
                   };
    unsigned *nums = set_num == 1 ? nums_1 : nums_2;
    BitVector *bit_vec = BitVec_new(31);
    for (unsigned i = 0; nums[i] != 0; i++) {
        BitVec_Set(bit_vec, nums[i]);
    }
    return bit_vec;
}
Exemple #13
0
static void
test_Mimic(TestBatchRunner *runner) {
    for (unsigned foo = 0; foo <= 17; foo++) {
        for (unsigned bar = 0; bar <= 17; bar++) {
            BitVector *foo_vec = BitVec_new(0);
            BitVector *bar_vec = BitVec_new(0);

            BitVec_Set(foo_vec, foo);
            BitVec_Set(bar_vec, bar);
            BitVec_Mimic(foo_vec, (Obj*)bar_vec);

            unsigned i;
            for (i = 0; i <= 17; i++) {
                if (BitVec_Get(foo_vec, i) && i != bar) { break; }
            }
            TEST_UINT_EQ(runner, i, 18, "Mimic(%u, %u)", foo, bar);

            DECREF(foo_vec);
            DECREF(bar_vec);
        }
    }
}
void
DefDelWriter_Delete_By_Doc_ID_IMP(DefaultDeletionsWriter *self, int32_t doc_id) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    uint32_t   sub_tick   = PolyReader_sub_tick(ivars->seg_starts, doc_id);
    BitVector *bit_vec    = (BitVector*)VA_Fetch(ivars->bit_vecs, sub_tick);
    uint32_t   offset     = I32Arr_Get(ivars->seg_starts, sub_tick);
    int32_t    seg_doc_id = doc_id - offset;

    if (!BitVec_Get(bit_vec, seg_doc_id)) {
        ivars->updated[sub_tick] = true;
        BitVec_Set(bit_vec, seg_doc_id);
    }
}
Exemple #15
0
static void
test_Next_Hit(TestBatchRunner *runner) {
    for (int i = 24; i <= 33; i++) {
        BitVector *bit_vec = BitVec_new(64);
        BitVec_Set(bit_vec, (size_t)i);
        TEST_INT_EQ(runner, BitVec_Next_Hit(bit_vec, 0), i,
                    "Next_Hit for 0 is %d", i);
        TEST_INT_EQ(runner, BitVec_Next_Hit(bit_vec, 0), i,
                    "Next_Hit for 1 is %d", i);
        for (int probe = 15; probe <= i; probe++) {
            TEST_INT_EQ(runner, BitVec_Next_Hit(bit_vec, (size_t)probe), i,
                        "Next_Hit for %d is %d", probe, i);
        }
        for (int probe = i + 1; probe <= i + 9; probe++) {
            TEST_INT_EQ(runner, BitVec_Next_Hit(bit_vec, (size_t)probe), -1,
                        "no Next_Hit for %d when max is %d", probe, i);
        }
        DECREF(bit_vec);
    }
}
DefaultDeletionsWriter*
DefDelWriter_init(DefaultDeletionsWriter *self, Schema *schema,
                  Snapshot *snapshot, Segment *segment,
                  PolyReader *polyreader) {

    DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader);
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    ivars->seg_readers          = PolyReader_Seg_Readers(polyreader);
    uint32_t num_seg_readers    = VA_Get_Size(ivars->seg_readers);
    ivars->seg_starts           = PolyReader_Offsets(polyreader);
    ivars->bit_vecs             = VA_new(num_seg_readers);
    ivars->updated              = (bool*)CALLOCATE(num_seg_readers, sizeof(bool));
    ivars->searcher             = IxSearcher_new((Obj*)polyreader);
    ivars->name_to_tick         = Hash_new(num_seg_readers);

    // Materialize a BitVector of deletions for each segment.
    for (uint32_t i = 0; i < num_seg_readers; i++) {
        SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i);
        BitVector *bit_vec    = BitVec_new(SegReader_Doc_Max(seg_reader));
        DeletionsReader *del_reader
            = (DeletionsReader*)SegReader_Fetch(
                  seg_reader, Class_Get_Name(DELETIONSREADER));
        Matcher *seg_dels = del_reader
                            ? DelReader_Iterator(del_reader)
                            : NULL;

        if (seg_dels) {
            int32_t del;
            while (0 != (del = Matcher_Next(seg_dels))) {
                BitVec_Set(bit_vec, del);
            }
            DECREF(seg_dels);
        }
        VA_Store(ivars->bit_vecs, i, (Obj*)bit_vec);
        Hash_Store(ivars->name_to_tick,
                   (Obj*)SegReader_Get_Seg_Name(seg_reader),
                   (Obj*)Int32_new(i));
    }

    return self;
}
Exemple #17
0
static void
test_Count(TestBatch *batch) {
    int i;
    int shuffled[64];
    BitVector *bit_vec = BitVec_new(64);

    for (i = 0; i < 64; i++) { shuffled[i] = i; }
    for (i = 0; i < 64; i++) {
        int shuffle_pos = rand() % 64;
        int temp = shuffled[shuffle_pos];
        shuffled[shuffle_pos] = shuffled[i];
        shuffled[i] = temp;
    }
    for (i = 0; i < 64; i++) {
        BitVec_Set(bit_vec, shuffled[i]);
        if (BitVec_Count(bit_vec) != (uint32_t)(i + 1)) { break; }
    }
    TEST_INT_EQ(batch, i, 64, "Count() returns the right number of bits");

    DECREF(bit_vec);
}
Exemple #18
0
static void
test_Count(TestBatchRunner *runner) {
    unsigned shuffled[64];
    BitVector *bit_vec = BitVec_new(64);

    for (unsigned i = 0; i < 64; i++) { shuffled[i] = i; }
    for (unsigned i = 0; i < 64; i++) {
        unsigned shuffle_pos = (unsigned)rand() % 64;
        unsigned temp = shuffled[shuffle_pos];
        shuffled[shuffle_pos] = shuffled[i];
        shuffled[i] = temp;
    }
    unsigned i;
    for (i = 0; i < 64; i++) {
        BitVec_Set(bit_vec, shuffled[i]);
        if (BitVec_Count(bit_vec) != (uint32_t)(i + 1)) { break; }
    }
    TEST_UINT_EQ(runner, i, 64, "Count() returns the right number of bits");

    DECREF(bit_vec);
}
Exemple #19
0
static void
test_Next_Hit(TestBatch *batch) {
    int i;

    for (i = 24; i <= 33; i++) {
        int probe;
        BitVector *bit_vec = BitVec_new(64);
        BitVec_Set(bit_vec, i);
        TEST_INT_EQ(batch, BitVec_Next_Hit(bit_vec, 0), i,
                    "Next_Hit for 0 is %d", i);
        TEST_INT_EQ(batch, BitVec_Next_Hit(bit_vec, 0), i,
                    "Next_Hit for 1 is %d", i);
        for (probe = 15; probe <= i; probe++) {
            TEST_INT_EQ(batch, BitVec_Next_Hit(bit_vec, probe), i,
                        "Next_Hit for %d is %d", probe, i);
        }
        for (probe = i + 1; probe <= i + 9; probe++) {
            TEST_INT_EQ(batch, BitVec_Next_Hit(bit_vec, probe), -1,
                        "no Next_Hit for %d when max is %d", probe, i);
        }
        DECREF(bit_vec);
    }
}
Exemple #20
0
VArray*
PhraseCompiler_highlight_spans(PhraseCompiler *self, Searcher *searcher, 
                               DocVector *doc_vec, const CharBuf *field)
{
    PhraseQuery *const parent = (PhraseQuery*)self->parent;
    VArray      *const terms  = parent->terms;
    VArray      *const spans  = VA_new(0);
    VArray      *term_vectors;
    BitVector   *posit_vec;
    BitVector   *other_posit_vec;
    uint32_t     i;
    const uint32_t  num_terms = VA_Get_Size(terms);
    uint32_t     num_tvs;
    UNUSED_VAR(searcher);

    // Bail if no terms or field doesn't match. 
    if (!num_terms) { return spans; }
    if (!CB_Equals(field, (Obj*)parent->field)) { return spans; }

    term_vectors    = VA_new(num_terms);
    posit_vec       = BitVec_new(0);
    other_posit_vec = BitVec_new(0);
    for (i = 0; i < num_terms; i++) {
        Obj *term = VA_Fetch(terms, i);
        TermVector *term_vector 
            = DocVec_Term_Vector(doc_vec, field, (CharBuf*)term);

        // Bail if any term is missing. 
        if (!term_vector)
            break;

        VA_Push(term_vectors, (Obj*)term_vector);

        if (i == 0) {
            // Set initial positions from first term. 
            uint32_t j;
            I32Array *positions = TV_Get_Positions(term_vector);
            for (j = I32Arr_Get_Size(positions); j > 0; j--) {
                BitVec_Set(posit_vec, I32Arr_Get(positions, j - 1));
            }
        }
        else {
            // Filter positions using logical "and". 
            uint32_t j;
            I32Array *positions = TV_Get_Positions(term_vector);

            BitVec_Clear_All(other_posit_vec);
            for (j = I32Arr_Get_Size(positions); j > 0; j--) {
                int32_t pos = I32Arr_Get(positions, j - 1) - i;
                if (pos >= 0) {
                    BitVec_Set(other_posit_vec, pos);
                }
            }
            BitVec_And(posit_vec, other_posit_vec);
        }
    }

    // Proceed only if all terms are present. 
    num_tvs = VA_Get_Size(term_vectors);
    if (num_tvs == num_terms) {
        TermVector *first_tv = (TermVector*)VA_Fetch(term_vectors, 0);
        TermVector *last_tv  
            = (TermVector*)VA_Fetch(term_vectors, num_tvs - 1);
        I32Array *tv_start_positions = TV_Get_Positions(first_tv);
        I32Array *tv_end_positions   = TV_Get_Positions(last_tv);
        I32Array *tv_start_offsets   = TV_Get_Start_Offsets(first_tv);
        I32Array *tv_end_offsets     = TV_Get_End_Offsets(last_tv);
        uint32_t  terms_max          = num_terms - 1;
        I32Array *valid_posits       = BitVec_To_Array(posit_vec);
        uint32_t  num_valid_posits   = I32Arr_Get_Size(valid_posits);
        uint32_t j = 0;
        uint32_t posit_tick;
        float weight = PhraseCompiler_Get_Weight(self);
        i = 0;

        // Add only those starts/ends that belong to a valid position. 
        for (posit_tick = 0; posit_tick < num_valid_posits; posit_tick++) {
            int32_t valid_start_posit = I32Arr_Get(valid_posits, posit_tick);
            int32_t valid_end_posit   = valid_start_posit + terms_max;
            int32_t start_offset = 0, end_offset = 0;
            uint32_t max;

            for (max = I32Arr_Get_Size(tv_start_positions); i < max; i++) {
                if (I32Arr_Get(tv_start_positions, i) == valid_start_posit) {
                    start_offset = I32Arr_Get(tv_start_offsets, i);
                    break;
                }
            }
            for (max = I32Arr_Get_Size(tv_end_positions); j < max; j++) {
                if (I32Arr_Get(tv_end_positions, j) == valid_end_posit) {
                    end_offset = I32Arr_Get(tv_end_offsets, j);
                    break;
                }
            }

            VA_Push(spans, (Obj*)Span_new(start_offset, 
                end_offset - start_offset, weight) );

            i++, j++;
        }

        DECREF(valid_posits);
    }

    DECREF(other_posit_vec);
    DECREF(posit_vec);
    DECREF(term_vectors);
    return spans;
}