Beispiel #1
0
static InverterEntry*
S_fetch_entry(InverterIVARS *ivars, CharBuf *field) {
    Schema *const schema = ivars->schema;
    int32_t field_num = Seg_Field_Num(ivars->segment, field);
    if (!field_num) {
        // This field seems not to be in the segment yet.  Try to find it in
        // the Schema.
        if (Schema_Fetch_Type(schema, field)) {
            // The field is in the Schema.  Get a field num from the Segment.
            field_num = Seg_Add_Field(ivars->segment, field);
        }
        else {
            // We've truly failed to find the field.  The user must
            // not have spec'd it.
            THROW(ERR, "Unknown field name: '%o'", field);
        }
    }

    InverterEntry *entry
        = (InverterEntry*)VA_Fetch(ivars->entry_pool, field_num);
    if (!entry) {
        entry = InvEntry_new(schema, (CharBuf*)field, field_num);
        VA_Store(ivars->entry_pool, field_num, (Obj*)entry);
    }
    return entry;
}
Beispiel #2
0
VArray*
HeatMap_Flatten_Spans_IMP(HeatMap *self, VArray *spans) {
    const uint32_t num_spans = VA_Get_Size(spans);
    UNUSED_VAR(self);

    if (!num_spans) {
        return VA_new(0);
    }
    else {
        VArray *flattened = S_flattened_but_empty_spans(spans);
        const uint32_t num_raw_flattened = VA_Get_Size(flattened);

        // Iterate over each of the source spans, contributing their scores to
        // any destination span that falls within range.
        uint32_t dest_tick = 0;
        for (uint32_t i = 0; i < num_spans; i++) {
            Span *source_span = (Span*)VA_Fetch(spans, i);
            int32_t source_span_offset = Span_Get_Offset(source_span);
            int32_t source_span_len    = Span_Get_Length(source_span);
            int32_t source_span_end    = source_span_offset + source_span_len;

            // Get the location of the flattened span that shares the source
            // span's offset.
            for (; dest_tick < num_raw_flattened; dest_tick++) {
                Span *dest_span = (Span*)VA_Fetch(flattened, dest_tick);
                if (Span_Get_Offset(dest_span) == source_span_offset) {
                    break;
                }
            }

            // Fill in scores.
            for (uint32_t j = dest_tick; j < num_raw_flattened; j++) {
                Span *dest_span = (Span*)VA_Fetch(flattened, j);
                if (Span_Get_Offset(dest_span) == source_span_end) {
                    break;
                }
                else {
                    float new_weight = Span_Get_Weight(dest_span)
                                       + Span_Get_Weight(source_span);
                    Span_Set_Weight(dest_span, new_weight);
                }
            }
        }

        // Leave holes instead of spans that don't have any score.
        dest_tick = 0;
        for (uint32_t i = 0; i < num_raw_flattened; i++) {
            Span *span = (Span*)VA_Fetch(flattened, i);
            if (Span_Get_Weight(span)) {
                VA_Store(flattened, dest_tick++, INCREF(span));
            }
        }
        VA_Excise(flattened, dest_tick, num_raw_flattened - dest_tick);

        return flattened;
    }
}
Beispiel #3
0
VArray*
VA_dump(VArray *self)
{
    VArray *dump = VA_new(self->size);
    u32_t i, max;
    for (i = 0, max = self->size; i < max; i++) {
        Obj *elem = VA_Fetch(self, i);
        if (elem) { VA_Store(dump, i, Obj_Dump(elem)); }
    }
    return dump;
}
Beispiel #4
0
static void
S_init_sub_readers(PolyReader *self, VArray *sub_readers) {
    PolyReaderIVARS *const ivars = PolyReader_IVARS(self);
    uint32_t  num_sub_readers = VA_Get_Size(sub_readers);
    int32_t *starts = (int32_t*)MALLOCATE(num_sub_readers * sizeof(int32_t));
    Hash  *data_readers = Hash_new(0);

    DECREF(ivars->sub_readers);
    DECREF(ivars->offsets);
    ivars->sub_readers       = (VArray*)INCREF(sub_readers);

    // Accumulate doc_max, subreader start offsets, and DataReaders.
    ivars->doc_max = 0;
    for (uint32_t i = 0; i < num_sub_readers; i++) {
        SegReader *seg_reader = (SegReader*)VA_Fetch(sub_readers, i);
        Hash *components = SegReader_Get_Components(seg_reader);
        CharBuf *api;
        DataReader *component;
        starts[i] = ivars->doc_max;
        ivars->doc_max += SegReader_Doc_Max(seg_reader);
        Hash_Iterate(components);
        while (Hash_Next(components, (Obj**)&api, (Obj**)&component)) {
            VArray *readers = (VArray*)Hash_Fetch(data_readers, (Obj*)api);
            if (!readers) {
                readers = VA_new(num_sub_readers);
                Hash_Store(data_readers, (Obj*)api, (Obj*)readers);
            }
            VA_Store(readers, i, INCREF(component));
        }
    }
    ivars->offsets = I32Arr_new_steal(starts, num_sub_readers);

    CharBuf *api;
    VArray  *readers;
    Hash_Iterate(data_readers);
    while (Hash_Next(data_readers, (Obj**)&api, (Obj**)&readers)) {
        DataReader *datareader
            = (DataReader*)CERTIFY(S_first_non_null(readers), DATAREADER);
        DataReader *aggregator
            = DataReader_Aggregator(datareader, readers, ivars->offsets);
        if (aggregator) {
            CERTIFY(aggregator, DATAREADER);
            Hash_Store(ivars->components, (Obj*)api, (Obj*)aggregator);
        }
    }
    DECREF(data_readers);

    DeletionsReader *del_reader
        = (DeletionsReader*)Hash_Fetch(
              ivars->components, (Obj*)VTable_Get_Name(DELETIONSREADER));
    ivars->del_count = del_reader ? DelReader_Del_Count(del_reader) : 0;
}
Beispiel #5
0
static void
S_round_trip_integer(TestBatch *batch, int64_t value) {
    Integer64 *num = Int64_new(value);
    VArray *array = VA_new(1);
    VA_Store(array, 0, (Obj*)num);
    CharBuf *json = Json_to_json((Obj*)array);
    Obj *dump = Json_from_json(json);
    TEST_TRUE(batch, VA_Equals(array, dump), "Round trip integer %ld",
              (long)value);
    DECREF(dump);
    DECREF(json);
    DECREF(array);
}
Beispiel #6
0
VArray*
IxManager_recycle(IndexManager *self, PolyReader *reader, 
                  DeletionsWriter *del_writer, int64_t cutoff, bool_t optimize)
{
    VArray *seg_readers = PolyReader_Get_Seg_Readers(reader);
    VArray *candidates  = VA_Gather(seg_readers, S_check_cutoff, &cutoff);
    VArray *recyclables = VA_new(VA_Get_Size(candidates));
    const uint32_t num_candidates = VA_Get_Size(candidates);

    if (optimize) { 
        DECREF(recyclables);
        return candidates; 
    }

    // Sort by ascending size in docs, choose sparsely populated segments.
    VA_Sort(candidates, S_compare_doc_count, NULL);
    int32_t *counts = (int32_t*)MALLOCATE(num_candidates * sizeof(int32_t));
    for (uint32_t i = 0; i < num_candidates; i++) {
        SegReader *seg_reader = (SegReader*)CERTIFY(
            VA_Fetch(candidates, i), SEGREADER);
        counts[i] = SegReader_Doc_Count(seg_reader);
    }
    I32Array *doc_counts = I32Arr_new_steal(counts, num_candidates);
    uint32_t threshold = IxManager_Choose_Sparse(self, doc_counts);
    DECREF(doc_counts);

    // Move SegReaders to be recycled.
    for (uint32_t i = 0; i < threshold; i++) {
        VA_Store(recyclables, i, VA_Delete(candidates, i));
    }

    // Find segments where at least 10% of all docs have been deleted. 
    for (uint32_t i = threshold; i < num_candidates; i++) {
        SegReader *seg_reader = (SegReader*)VA_Delete(candidates, i);
        CharBuf   *seg_name   = SegReader_Get_Seg_Name(seg_reader);
        double doc_max = SegReader_Doc_Max(seg_reader);
        double num_deletions = DelWriter_Seg_Del_Count(del_writer, seg_name);
        double del_proportion = num_deletions / doc_max;
        if (del_proportion >= 0.1) {
            VA_Push(recyclables, (Obj*)seg_reader);
        }
        else {
            DECREF(seg_reader);
        }
    }

    DECREF(candidates);
    return recyclables;
}
Beispiel #7
0
static void
S_round_trip_float(TestBatch *batch, double value, double max_diff) {
    Float64 *num = Float64_new(value);
    VArray *array = VA_new(1);
    VA_Store(array, 0, (Obj*)num);
    CharBuf *json = Json_to_json((Obj*)array);
    Obj *dump = CERTIFY(Json_from_json(json), VARRAY);
    Float64 *got = (Float64*)CERTIFY(VA_Fetch((VArray*)dump, 0), FLOAT64);
    double diff = Float64_Get_Value(num) - Float64_Get_Value(got);
    if (diff < 0) { diff = 0 - diff; }
    TEST_TRUE(batch, diff <= max_diff, "Round trip float %f", value);
    DECREF(dump);
    DECREF(json);
    DECREF(array);
}
Beispiel #8
0
VArray*
PriQ_Pop_All_IMP(PriorityQueue *self) {
    PriorityQueueIVARS *const ivars = PriQ_IVARS(self);
    VArray *retval = VA_new(ivars->size);

    // Map the queue nodes onto the array in reverse order.
    if (ivars->size) {
        for (uint32_t i = ivars->size; i--;) {
            Obj *const elem = PriQ_Pop(self);
            VA_Store(retval, i, elem);
        }
    }

    return retval;
}
Beispiel #9
0
VArray*
VA_load(VArray *self, Obj *dump)
{
    VArray *source = (VArray*)ASSERT_IS_A(dump, VARRAY);
    VArray *loaded = VA_new(source->size);
    u32_t i, max;
    UNUSED_VAR(self);

    for (i = 0, max = source->size; i < max; i++) {
        Obj *elem_dump = VA_Fetch(source, i);
        if (elem_dump) {
            VA_Store(loaded, i, Obj_Load(elem_dump, elem_dump));
        }
    }

    return loaded;
}
Beispiel #10
0
void
MemPool_eat(MemoryPool *self, MemoryPool *other) {
    i32_t i;
    if (self->buf != NULL)
        THROW("Memory pool is not empty");

    /* Move active arenas from other to self. */
    for (i = 0; i <= other->tick; i++) {
        ByteBuf *arena = (ByteBuf*)VA_Shift(other->arenas);
        /* Maybe displace existing arena. */
        VA_Store(self->arenas, i, (Obj*)arena); 
    }
    self->tick     = other->tick;
    self->last_buf = other->last_buf;
    self->buf      = other->buf;
    self->limit    = other->limit;
}
Beispiel #11
0
static SortFieldWriter*
S_lazy_init_field_writer(SortWriter *self, int32_t field_num) {
    SortWriterIVARS *const ivars = SortWriter_IVARS(self);

    SortFieldWriter *field_writer
        = (SortFieldWriter*)VA_Fetch(ivars->field_writers, field_num);
    if (!field_writer) {

        // Open temp files.
        if (!ivars->temp_ord_out) {
            Folder  *folder   = ivars->folder;
            CharBuf *seg_name = Seg_Get_Name(ivars->segment);
            CharBuf *path     = CB_newf("%o/sort_ord_temp", seg_name);
            ivars->temp_ord_out = Folder_Open_Out(folder, path);
            if (!ivars->temp_ord_out) {
                DECREF(path);
                RETHROW(INCREF(Err_get_error()));
            }
            CB_setf(path, "%o/sort_ix_temp", seg_name);
            ivars->temp_ix_out = Folder_Open_Out(folder, path);
            if (!ivars->temp_ix_out) {
                DECREF(path);
                RETHROW(INCREF(Err_get_error()));
            }
            CB_setf(path, "%o/sort_dat_temp", seg_name);
            ivars->temp_dat_out = Folder_Open_Out(folder, path);
            if (!ivars->temp_dat_out) {
                DECREF(path);
                RETHROW(INCREF(Err_get_error()));
            }
            DECREF(path);
        }

        CharBuf *field = Seg_Field_Name(ivars->segment, field_num);
        field_writer
            = SortFieldWriter_new(ivars->schema, ivars->snapshot, ivars->segment,
                                  ivars->polyreader, field, ivars->mem_pool,
                                  ivars->mem_thresh, ivars->temp_ord_out,
                                  ivars->temp_ix_out, ivars->temp_dat_out);
        VA_Store(ivars->field_writers, field_num, (Obj*)field_writer);
    }
    return field_writer;
}
Beispiel #12
0
DefaultDeletionsWriter*
DefDelWriter_init(DefaultDeletionsWriter *self, Schema *schema,
                  Snapshot *snapshot, Segment *segment,
                  PolyReader *polyreader) {

    DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader);
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    ivars->seg_readers          = PolyReader_Seg_Readers(polyreader);
    uint32_t num_seg_readers    = VA_Get_Size(ivars->seg_readers);
    ivars->seg_starts           = PolyReader_Offsets(polyreader);
    ivars->bit_vecs             = VA_new(num_seg_readers);
    ivars->updated              = (bool*)CALLOCATE(num_seg_readers, sizeof(bool));
    ivars->searcher             = IxSearcher_new((Obj*)polyreader);
    ivars->name_to_tick         = Hash_new(num_seg_readers);

    // Materialize a BitVector of deletions for each segment.
    for (uint32_t i = 0; i < num_seg_readers; i++) {
        SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i);
        BitVector *bit_vec    = BitVec_new(SegReader_Doc_Max(seg_reader));
        DeletionsReader *del_reader
            = (DeletionsReader*)SegReader_Fetch(
                  seg_reader, Class_Get_Name(DELETIONSREADER));
        Matcher *seg_dels = del_reader
                            ? DelReader_Iterator(del_reader)
                            : NULL;

        if (seg_dels) {
            int32_t del;
            while (0 != (del = Matcher_Next(seg_dels))) {
                BitVec_Set(bit_vec, del);
            }
            DECREF(seg_dels);
        }
        VA_Store(ivars->bit_vecs, i, (Obj*)bit_vec);
        Hash_Store(ivars->name_to_tick,
                   (Obj*)SegReader_Get_Seg_Name(seg_reader),
                   (Obj*)Int32_new(i));
    }

    return self;
}
Beispiel #13
0
DefaultLexiconReader*
DefLexReader_init(DefaultLexiconReader *self, Schema *schema, Folder *folder,
                  Snapshot *snapshot, VArray *segments, int32_t seg_tick) {

    // Init.
    LexReader_init((LexiconReader*)self, schema, folder, snapshot, segments,
                   seg_tick);
    DefaultLexiconReaderIVARS *const ivars = DefLexReader_IVARS(self);
    Segment *segment = DefLexReader_Get_Segment(self);

    // Build an array of SegLexicon objects.
    ivars->lexicons = VA_new(Schema_Num_Fields(schema));
    for (uint32_t i = 1, max = Schema_Num_Fields(schema) + 1; i < max; i++) {
        String *field = Seg_Field_Name(segment, i);
        if (field && S_has_data(schema, folder, segment, field)) {
            SegLexicon *lexicon = SegLex_new(schema, folder, segment, field);
            VA_Store(ivars->lexicons, i, (Obj*)lexicon);
        }
    }

    return self;
}
DefaultLexiconReader*
DefLexReader_init(DefaultLexiconReader *self, Schema *schema, Folder *folder,
               Snapshot *snapshot, VArray *segments, i32_t seg_tick)
{
    Segment *segment;
    u32_t    i, max;

    /* Init. */
    LexReader_init((LexiconReader*)self, schema, folder, snapshot, segments,
        seg_tick);
    segment = DefLexReader_Get_Segment(self);

    /* Build an array of SegLexicon objects. */
    self->lexicons = VA_new(Schema_Num_Fields(schema));
    for (i = 1, max = Schema_Num_Fields(schema) + 1; i < max; i++) {
        CharBuf *field = Seg_Field_Name(segment, i);
        if (field && S_has_data(schema, folder, segment, field)) {
            SegLexicon *lexicon = SegLex_new(schema, folder, segment, field);
            VA_Store(self->lexicons, i, (Obj*)lexicon);
        }
    }

    return self;
}
Beispiel #15
0
void
NOTQuery_set_negated_query(NOTQuery *self, Query *negated_query)
{
    VA_Store(self->children, 0, INCREF(negated_query));
}
Beispiel #16
0
void
NOTQuery_set_negated_query(NOTQuery *self, Query *negated_query) {
    NOTQueryIVARS *const ivars = NOTQuery_IVARS(self);
    VA_Store(ivars->children, 0, INCREF(negated_query));
}
Beispiel #17
0
void
SortColl_collect(SortCollector *self, int32_t doc_id) {
    SortCollectorIVARS *const ivars = SortColl_IVARS(self);

    // Add to the total number of hits.
    ivars->total_hits++;

    // Collect this hit if it's competitive.
    if (SI_competitive(ivars, doc_id)) {
        MatchDoc *const match_doc = ivars->bumped;
        MatchDocIVARS *const match_doc_ivars = MatchDoc_IVARS(match_doc);
        match_doc_ivars->doc_id = doc_id + ivars->base;

        if (ivars->need_score && match_doc_ivars->score == F32_NEGINF) {
            match_doc_ivars->score = Matcher_Score(ivars->matcher);
        }

        // Fetch values so that cross-segment sorting can work.
        if (ivars->need_values) {
            VArray *values = match_doc_ivars->values;

            for (uint32_t i = 0, max = ivars->num_rules; i < max; i++) {
                SortCache *cache   = ivars->sort_caches[i];
                Obj       *old_val = (Obj*)VA_Delete(values, i);
                if (cache) {
                    int32_t ord = SortCache_Ordinal(cache, doc_id);
                    Obj *blank = old_val
                                 ? old_val
                                 : SortCache_Make_Blank(cache);
                    Obj *val = SortCache_Value(cache, ord, blank);
                    if (val) { VA_Store(values, i, (Obj*)val); }
                    else     { DECREF(blank); }
                }
            }
        }

        // Insert the new MatchDoc.
        ivars->bumped = (MatchDoc*)HitQ_Jostle(ivars->hit_q, (Obj*)match_doc);

        if (ivars->bumped) {
            if (ivars->bumped == match_doc) {
                /* The queue is full, and we have established a threshold for
                 * this segment as to what sort of document is definitely not
                 * acceptable.  Turn off AUTO_ACCEPT and start actually
                 * testing whether hits are competitive. */
                ivars->bubble_score  = match_doc_ivars->score;
                ivars->bubble_doc    = doc_id;
                ivars->actions       = ivars->derived_actions;
            }

            // Recycle.
            MatchDoc_IVARS(ivars->bumped)->score = ivars->need_score
                                                   ? F32_NEGINF
                                                   : F32_NAN;
        }
        else {
            // The queue isn't full yet, so create a fresh MatchDoc.
            VArray *values = ivars->need_values
                             ? VA_new(ivars->num_rules)
                             : NULL;
            float fake_score = ivars->need_score ? F32_NEGINF : F32_NAN;
            ivars->bumped = MatchDoc_new(INT32_MAX, fake_score, values);
            DECREF(values);
        }

    }
}