Example #1
0
static void
S_compose_inner_queries(QueryParser *self, Vector *elems,
                        String *default_field) {
    const int32_t default_occur = QParser_IVARS(self)->default_occur;

    // Generate all queries.  Apply any fields.
    for (uint32_t i = Vec_Get_Size(elems); i--;) {
        String *field = default_field;
        ParserElem *elem = (ParserElem*)Vec_Fetch(elems, i);

        // Apply field.
        if (i > 0) {
            // Field specifier must immediately precede any query.
            ParserElem* maybe_field_elem
                = (ParserElem*)Vec_Fetch(elems, i - 1);
            if (ParserElem_Get_Type(maybe_field_elem) == TOKEN_FIELD) {
                field = (String*)ParserElem_As(maybe_field_elem, STRING);
            }
        }

        if (ParserElem_Get_Type(elem) == TOKEN_STRING) {
            String *text = (String*)ParserElem_As(elem, STRING);
            LeafQuery *query = LeafQuery_new(field, text);
            ParserElem *new_elem
                = ParserElem_new(TOKEN_QUERY, (Obj*)query);
            if (default_occur == MUST) {
                ParserElem_Require(new_elem);
            }
            Vec_Store(elems, i, (Obj*)new_elem);
        }
    }
}
Example #2
0
static InverterEntry*
S_fetch_entry(InverterIVARS *ivars, String *field) {
    Schema *const schema = ivars->schema;
    int32_t field_num = Seg_Field_Num(ivars->segment, field);
    if (!field_num) {
        // This field seems not to be in the segment yet.  Try to find it in
        // the Schema.
        if (Schema_Fetch_Type(schema, field)) {
            // The field is in the Schema.  Get a field num from the Segment.
            field_num = Seg_Add_Field(ivars->segment, field);
        }
        else {
            // We've truly failed to find the field.  The user must
            // not have spec'd it.
            THROW(ERR, "Unknown field name: '%o'", field);
        }
    }

    InverterEntry *entry
        = (InverterEntry*)Vec_Fetch(ivars->entry_pool, field_num);
    if (!entry) {
        entry = InvEntry_new(schema, (String*)field, field_num);
        Vec_Store(ivars->entry_pool, field_num, (Obj*)entry);
    }
    return entry;
}
Example #3
0
Vector*
HeatMap_Flatten_Spans_IMP(HeatMap *self, Vector *spans) {
    const size_t num_spans = Vec_Get_Size(spans);
    UNUSED_VAR(self);

    if (!num_spans) {
        return Vec_new(0);
    }
    else {
        Vector *flattened = S_flattened_but_empty_spans(spans);
        const size_t num_raw_flattened = Vec_Get_Size(flattened);

        // Iterate over each of the source spans, contributing their scores to
        // any destination span that falls within range.
        size_t dest_tick = 0;
        for (size_t i = 0; i < num_spans; i++) {
            Span *source_span = (Span*)Vec_Fetch(spans, i);
            int32_t source_span_offset = Span_Get_Offset(source_span);
            int32_t source_span_len    = Span_Get_Length(source_span);
            int32_t source_span_end    = source_span_offset + source_span_len;

            // Get the location of the flattened span that shares the source
            // span's offset.
            for (; dest_tick < num_raw_flattened; dest_tick++) {
                Span *dest_span = (Span*)Vec_Fetch(flattened, dest_tick);
                if (Span_Get_Offset(dest_span) == source_span_offset) {
                    break;
                }
            }

            // Fill in scores.
            for (size_t j = dest_tick; j < num_raw_flattened; j++) {
                Span *dest_span = (Span*)Vec_Fetch(flattened, j);
                if (Span_Get_Offset(dest_span) == source_span_end) {
                    break;
                }
                else {
                    float new_weight = Span_Get_Weight(dest_span)
                                       + Span_Get_Weight(source_span);
                    Span_Set_Weight(dest_span, new_weight);
                }
            }
        }

        // Leave holes instead of spans that don't have any score.
        dest_tick = 0;
        for (size_t i = 0; i < num_raw_flattened; i++) {
            Span *span = (Span*)Vec_Fetch(flattened, i);
            if (Span_Get_Weight(span)) {
                Vec_Store(flattened, dest_tick++, INCREF(span));
            }
        }
        Vec_Excise(flattened, dest_tick, num_raw_flattened - dest_tick);

        return flattened;
    }
}
Example #4
0
static void
S_parse_subqueries(QueryParser *self, Vector *elems) {
    const int32_t default_occur = QParser_IVARS(self)->default_occur;
    while (1) {
        // Work from the inside out, starting with the leftmost innermost
        // paren group.
        size_t left = SIZE_MAX;
        size_t right = SIZE_MAX;
        String *field = NULL;
        for (size_t i = 0, max = Vec_Get_Size(elems); i < max; i++) {
            ParserElem *elem = (ParserElem*)Vec_Fetch(elems, i);
            uint32_t type = ParserElem_Get_Type(elem);
            if (type == TOKEN_OPEN_PAREN) {
                left = i;
            }
            else if (type == TOKEN_CLOSE_PAREN) {
                right = i;
                break;
            }
            else if (type == TOKEN_FIELD && i < max - 1) {
                // If a field applies to an enclosing paren, pass it along.
                ParserElem *next_elem = (ParserElem*)Vec_Fetch(elems, i + 1);
                uint32_t next_type = ParserElem_Get_Type(next_elem);
                if (next_type == TOKEN_OPEN_PAREN) {
                    field = (String*)ParserElem_As(elem, STRING);
                }
            }
        }

        // Break out of loop when there are no parens left.
        if (right == SIZE_MAX) {
            break;
        }

        // Create the subquery.
        Vector *sub_elems = Vec_Slice(elems, left + 1, right - left - 1);
        Query *subquery = S_parse_subquery(self, sub_elems, field, true);
        ParserElem *new_elem = ParserElem_new(TOKEN_QUERY, (Obj*)subquery);
        if (default_occur == MUST) {
            ParserElem_Require(new_elem);
        }
        DECREF(sub_elems);

        // Replace the elements used to create the subquery with the subquery
        // itself.
        if (left > 0) {
            ParserElem *maybe_field = (ParserElem*)Vec_Fetch(elems, left - 1);
            uint32_t maybe_field_type = ParserElem_Get_Type(maybe_field);
            if (maybe_field_type == TOKEN_FIELD) {
                left -= 1;
            }
        }
        Vec_Excise(elems, left + 1, right - left);
        Vec_Store(elems, left, (Obj*)new_elem);
    }
}
Example #5
0
Vector*
IxManager_Recycle_IMP(IndexManager *self, PolyReader *reader,
                      DeletionsWriter *del_writer, int64_t cutoff,
                      bool optimize) {
    Vector *seg_readers = PolyReader_Get_Seg_Readers(reader);
    size_t num_seg_readers = Vec_Get_Size(seg_readers);
    SegReader **candidates
        = (SegReader**)MALLOCATE(num_seg_readers * sizeof(SegReader*));
    size_t num_candidates = 0;
    for (size_t i = 0; i < num_seg_readers; i++) {
        SegReader *seg_reader = (SegReader*)Vec_Fetch(seg_readers, i);
        if (SegReader_Get_Seg_Num(seg_reader) > cutoff) {
            candidates[num_candidates++] = seg_reader;
        }
    }

    Vector *recyclables = Vec_new(num_candidates);

    if (optimize) {
        for (size_t i = 0; i < num_candidates; i++) {
            Vec_Push(recyclables, INCREF(candidates[i]));
        }
        FREEMEM(candidates);
        return recyclables;
    }

    // Sort by ascending size in docs, choose sparsely populated segments.
    qsort(candidates, num_candidates, sizeof(SegReader*), S_compare_doc_count);
    int32_t *counts = (int32_t*)MALLOCATE(num_candidates * sizeof(int32_t));
    for (uint32_t i = 0; i < num_candidates; i++) {
        counts[i] = SegReader_Doc_Count(candidates[i]);
    }
    I32Array *doc_counts = I32Arr_new_steal(counts, num_candidates);
    uint32_t threshold = IxManager_Choose_Sparse(self, doc_counts);
    DECREF(doc_counts);

    // Move SegReaders to be recycled.
    for (uint32_t i = 0; i < threshold; i++) {
        Vec_Store(recyclables, i, INCREF(candidates[i]));
    }

    // Find segments where at least 10% of all docs have been deleted.
    for (uint32_t i = threshold; i < num_candidates; i++) {
        SegReader *seg_reader = candidates[i];
        String    *seg_name   = SegReader_Get_Seg_Name(seg_reader);
        double doc_max = SegReader_Doc_Max(seg_reader);
        double num_deletions = DelWriter_Seg_Del_Count(del_writer, seg_name);
        double del_proportion = num_deletions / doc_max;
        if (del_proportion >= 0.1) {
            Vec_Push(recyclables, INCREF(seg_reader));
        }
    }

    FREEMEM(candidates);
    return recyclables;
}
Example #6
0
static Obj*
S_dump_array(Vector *array) {
    Vector *dump = Vec_new(Vec_Get_Size(array));
    for (size_t i = 0, max = Vec_Get_Size(array); i < max; i++) {
        Obj *elem = Vec_Fetch(array, i);
        if (elem) {
            Vec_Store(dump, i, Freezer_dump(elem));
        }
    }
    return (Obj*)dump;
}
Example #7
0
Obj*
S_load_from_array(Vector *dump) {
    Vector *loaded = Vec_new(Vec_Get_Size(dump));

    for (size_t i = 0, max = Vec_Get_Size(dump); i < max; i++) {
        Obj *elem_dump = Vec_Fetch(dump, i);
        if (elem_dump) {
            Vec_Store(loaded, i, Freezer_load(elem_dump));
        }
    }

    return (Obj*)loaded;
}
Example #8
0
static void
S_round_trip_integer(TestBatchRunner *runner, int64_t value) {
    Integer *num = Int_new(value);
    Vector *array = Vec_new(1);
    Vec_Store(array, 0, (Obj*)num);
    String *json = Json_to_json((Obj*)array);
    Obj *dump = Json_from_json(json);
    TEST_TRUE(runner, Vec_Equals(array, dump), "Round trip integer %ld",
              (long)value);
    DECREF(dump);
    DECREF(json);
    DECREF(array);
}
Example #9
0
Vector*
Freezer_deserialize_varray(Vector *array, InStream *instream) {
    uint32_t size = InStream_Read_C32(instream);
    Vec_init(array, size);
    for (uint32_t tick = InStream_Read_C32(instream);
         tick < size;
         tick += InStream_Read_C32(instream)
        ) {
        Obj *obj = THAW(instream);
        Vec_Store(array, tick, obj);
    }
    Vec_Resize(array, size);
    return array;
}
Example #10
0
static void
S_round_trip_float(TestBatchRunner *runner, double value, double max_diff) {
    Float *num = Float_new(value);
    Vector *array = Vec_new(1);
    Vec_Store(array, 0, (Obj*)num);
    String *json = Json_to_json((Obj*)array);
    Obj *dump = CERTIFY(Json_from_json(json), VECTOR);
    Float *got = (Float*)CERTIFY(Vec_Fetch((Vector*)dump, 0), FLOAT);
    double diff = Float_Get_Value(num) - Float_Get_Value(got);
    if (diff < 0) { diff = 0 - diff; }
    TEST_TRUE(runner, diff <= max_diff, "Round trip float %f", value);
    DECREF(dump);
    DECREF(json);
    DECREF(array);
}
Example #11
0
static void
S_do_prune(QueryParser *self, Query *query) {
    if (Query_is_a(query, NOTQUERY)) {
        // Don't allow double negatives.
        NOTQuery *not_query = (NOTQuery*)query;
        Query *neg_query = NOTQuery_Get_Negated_Query(not_query);
        if (!Query_is_a(neg_query, MATCHALLQUERY)
            && !S_has_valid_clauses(neg_query)
           ) {
            MatchAllQuery *matchall = MatchAllQuery_new();
            NOTQuery_Set_Negated_Query(not_query, (Query*)matchall);
            DECREF(matchall);
        }
    }
    else if (Query_is_a(query, POLYQUERY)) {
        PolyQuery *polyquery = (PolyQuery*)query;
        Vector    *children  = PolyQuery_Get_Children(polyquery);

        // Recurse.
        for (uint32_t i = 0, max = Vec_Get_Size(children); i < max; i++) {
            Query *child = (Query*)Vec_Fetch(children, i);
            S_do_prune(self, child);
        }

        if (PolyQuery_is_a(polyquery, REQUIREDOPTIONALQUERY)
            || PolyQuery_is_a(polyquery, ORQUERY)
           ) {
            // Don't allow 'foo OR (-bar)'.
            Vector *children = PolyQuery_Get_Children(polyquery);
            for (uint32_t i = 0, max = Vec_Get_Size(children); i < max; i++) {
                Query *child = (Query*)Vec_Fetch(children, i);
                if (!S_has_valid_clauses(child)) {
                    Vec_Store(children, i, (Obj*)NoMatchQuery_new());
                }
            }
        }
        else if (PolyQuery_is_a(polyquery, ANDQUERY)) {
            // Don't allow '(-bar AND -baz)'.
            if (!S_has_valid_clauses((Query*)polyquery)) {
                Vector *children = PolyQuery_Get_Children(polyquery);
                Vec_Clear(children);
            }
        }
    }
}
Example #12
0
static SortFieldWriter*
S_lazy_init_field_writer(SortWriter *self, int32_t field_num) {
    SortWriterIVARS *const ivars = SortWriter_IVARS(self);

    SortFieldWriter *field_writer
        = (SortFieldWriter*)Vec_Fetch(ivars->field_writers, (size_t)field_num);
    if (!field_writer) {

        // Open temp files.
        if (!ivars->temp_ord_out) {
            Folder *folder   = ivars->folder;
            String *seg_name = Seg_Get_Name(ivars->segment);
            String *ord_path = Str_newf("%o/sort_ord_temp", seg_name);
            ivars->temp_ord_out = Folder_Open_Out(folder, ord_path);
            DECREF(ord_path);
            if (!ivars->temp_ord_out) {
                RETHROW(INCREF(Err_get_error()));
            }
            String *ix_path = Str_newf("%o/sort_ix_temp", seg_name);
            ivars->temp_ix_out = Folder_Open_Out(folder, ix_path);
            DECREF(ix_path);
            if (!ivars->temp_ix_out) {
                RETHROW(INCREF(Err_get_error()));
            }
            String *dat_path = Str_newf("%o/sort_dat_temp", seg_name);
            ivars->temp_dat_out = Folder_Open_Out(folder, dat_path);
            DECREF(dat_path);
            if (!ivars->temp_dat_out) {
                RETHROW(INCREF(Err_get_error()));
            }
        }

        String *field = Seg_Field_Name(ivars->segment, field_num);
        field_writer
            = SortFieldWriter_new(ivars->schema, ivars->snapshot, ivars->segment,
                                  ivars->polyreader, field, ivars->counter,
                                  ivars->mem_thresh, ivars->temp_ord_out,
                                  ivars->temp_ix_out, ivars->temp_dat_out);
        Vec_Store(ivars->field_writers, (size_t)field_num, (Obj*)field_writer);
    }
    return field_writer;
}
Example #13
0
DefaultDeletionsWriter*
DefDelWriter_init(DefaultDeletionsWriter *self, Schema *schema,
                  Snapshot *snapshot, Segment *segment,
                  PolyReader *polyreader) {

    DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader);
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    ivars->seg_readers          = PolyReader_Seg_Readers(polyreader);
    size_t num_seg_readers      = Vec_Get_Size(ivars->seg_readers);
    ivars->seg_starts           = PolyReader_Offsets(polyreader);
    ivars->bit_vecs             = Vec_new(num_seg_readers);
    ivars->updated              = (bool*)CALLOCATE(num_seg_readers, sizeof(bool));
    ivars->searcher             = IxSearcher_new((Obj*)polyreader);
    ivars->name_to_tick         = Hash_new(num_seg_readers);

    // Materialize a BitVector of deletions for each segment.
    for (size_t i = 0; i < num_seg_readers; i++) {
        SegReader *seg_reader = (SegReader*)Vec_Fetch(ivars->seg_readers, i);
        BitVector *bit_vec    = BitVec_new((size_t)SegReader_Doc_Max(seg_reader));
        DeletionsReader *del_reader
            = (DeletionsReader*)SegReader_Fetch(
                  seg_reader, Class_Get_Name(DELETIONSREADER));
        Matcher *seg_dels = del_reader
                            ? DelReader_Iterator(del_reader)
                            : NULL;

        if (seg_dels) {
            int32_t del;
            while (0 != (del = Matcher_Next(seg_dels))) {
                BitVec_Set(bit_vec, (size_t)del);
            }
            DECREF(seg_dels);
        }
        Vec_Store(ivars->bit_vecs, i, (Obj*)bit_vec);
        Hash_Store(ivars->name_to_tick,
                   SegReader_Get_Seg_Name(seg_reader),
                   (Obj*)Int_new((int64_t)i));
    }

    return self;
}
Example #14
0
DefaultLexiconReader*
DefLexReader_init(DefaultLexiconReader *self, Schema *schema, Folder *folder,
                  Snapshot *snapshot, Vector *segments, int32_t seg_tick) {

    // Init.
    LexReader_init((LexiconReader*)self, schema, folder, snapshot, segments,
                   seg_tick);
    DefaultLexiconReaderIVARS *const ivars = DefLexReader_IVARS(self);
    Segment *segment = DefLexReader_Get_Segment(self);

    // Build an array of SegLexicon objects.
    ivars->lexicons = Vec_new(Schema_Num_Fields(schema));
    for (uint32_t i = 1, max = Schema_Num_Fields(schema) + 1; i < max; i++) {
        String *field = Seg_Field_Name(segment, (int32_t)i);
        if (field && S_has_data(schema, folder, segment, field)) {
            SegLexicon *lexicon = SegLex_new(schema, folder, segment, field);
            Vec_Store(ivars->lexicons, i, (Obj*)lexicon);
        }
    }

    return self;
}
Example #15
0
void
SortColl_Collect_IMP(SortCollector *self, int32_t doc_id) {
    SortCollectorIVARS *const ivars = SortColl_IVARS(self);

    // Add to the total number of hits.
    ivars->total_hits++;

    // Collect this hit if it's competitive.
    if (SI_competitive(ivars, doc_id)) {
        MatchDoc *const match_doc = ivars->bumped;
        MatchDocIVARS *const match_doc_ivars = MatchDoc_IVARS(match_doc);
        match_doc_ivars->doc_id = doc_id + ivars->base;

        if (ivars->need_score && match_doc_ivars->score == CHY_F32_NEGINF) {
            match_doc_ivars->score = Matcher_Score(ivars->matcher);
        }

        // Fetch values so that cross-segment sorting can work.
        if (ivars->need_values) {
            Vector *values = match_doc_ivars->values;

            for (uint32_t i = 0, max = ivars->num_rules; i < max; i++) {
                SortCache *cache   = ivars->sort_caches[i];
                Obj       *old_val = Vec_Delete(values, i);
                DECREF(old_val);
                if (cache) {
                    int32_t ord = SortCache_Ordinal(cache, doc_id);
                    Obj *val = SortCache_Value(cache, ord);
                    if (val) { Vec_Store(values, i, (Obj*)val); }
                }
            }
        }

        // Insert the new MatchDoc.
        ivars->bumped = (MatchDoc*)HitQ_Jostle(ivars->hit_q, (Obj*)match_doc);

        if (ivars->bumped) {
            if (ivars->bumped == match_doc) {
                /* The queue is full, and we have established a threshold for
                 * this segment as to what sort of document is definitely not
                 * acceptable.  Turn off AUTO_ACCEPT and start actually
                 * testing whether hits are competitive. */
                ivars->bubble_score  = match_doc_ivars->score;
                ivars->bubble_doc    = doc_id;
                ivars->actions       = ivars->derived_actions;
            }

            // Recycle.
            MatchDoc_IVARS(ivars->bumped)->score = ivars->need_score
                                                   ? CHY_F32_NEGINF
                                                   : CHY_F32_NAN;
        }
        else {
            // The queue isn't full yet, so create a fresh MatchDoc.
            Vector *values = ivars->need_values
                             ? Vec_new(ivars->num_rules)
                             : NULL;
            float fake_score = ivars->need_score
                               ? CHY_F32_NEGINF
                               : CHY_F32_NAN;
            ivars->bumped = MatchDoc_new(INT32_MAX, fake_score, values);
            DECREF(values);
        }

    }
}