static void S_compose_inner_queries(QueryParser *self, Vector *elems, String *default_field) { const int32_t default_occur = QParser_IVARS(self)->default_occur; // Generate all queries. Apply any fields. for (uint32_t i = Vec_Get_Size(elems); i--;) { String *field = default_field; ParserElem *elem = (ParserElem*)Vec_Fetch(elems, i); // Apply field. if (i > 0) { // Field specifier must immediately precede any query. ParserElem* maybe_field_elem = (ParserElem*)Vec_Fetch(elems, i - 1); if (ParserElem_Get_Type(maybe_field_elem) == TOKEN_FIELD) { field = (String*)ParserElem_As(maybe_field_elem, STRING); } } if (ParserElem_Get_Type(elem) == TOKEN_STRING) { String *text = (String*)ParserElem_As(elem, STRING); LeafQuery *query = LeafQuery_new(field, text); ParserElem *new_elem = ParserElem_new(TOKEN_QUERY, (Obj*)query); if (default_occur == MUST) { ParserElem_Require(new_elem); } Vec_Store(elems, i, (Obj*)new_elem); } } }
static InverterEntry* S_fetch_entry(InverterIVARS *ivars, String *field) { Schema *const schema = ivars->schema; int32_t field_num = Seg_Field_Num(ivars->segment, field); if (!field_num) { // This field seems not to be in the segment yet. Try to find it in // the Schema. if (Schema_Fetch_Type(schema, field)) { // The field is in the Schema. Get a field num from the Segment. field_num = Seg_Add_Field(ivars->segment, field); } else { // We've truly failed to find the field. The user must // not have spec'd it. THROW(ERR, "Unknown field name: '%o'", field); } } InverterEntry *entry = (InverterEntry*)Vec_Fetch(ivars->entry_pool, field_num); if (!entry) { entry = InvEntry_new(schema, (String*)field, field_num); Vec_Store(ivars->entry_pool, field_num, (Obj*)entry); } return entry; }
Vector* HeatMap_Flatten_Spans_IMP(HeatMap *self, Vector *spans) { const size_t num_spans = Vec_Get_Size(spans); UNUSED_VAR(self); if (!num_spans) { return Vec_new(0); } else { Vector *flattened = S_flattened_but_empty_spans(spans); const size_t num_raw_flattened = Vec_Get_Size(flattened); // Iterate over each of the source spans, contributing their scores to // any destination span that falls within range. size_t dest_tick = 0; for (size_t i = 0; i < num_spans; i++) { Span *source_span = (Span*)Vec_Fetch(spans, i); int32_t source_span_offset = Span_Get_Offset(source_span); int32_t source_span_len = Span_Get_Length(source_span); int32_t source_span_end = source_span_offset + source_span_len; // Get the location of the flattened span that shares the source // span's offset. for (; dest_tick < num_raw_flattened; dest_tick++) { Span *dest_span = (Span*)Vec_Fetch(flattened, dest_tick); if (Span_Get_Offset(dest_span) == source_span_offset) { break; } } // Fill in scores. for (size_t j = dest_tick; j < num_raw_flattened; j++) { Span *dest_span = (Span*)Vec_Fetch(flattened, j); if (Span_Get_Offset(dest_span) == source_span_end) { break; } else { float new_weight = Span_Get_Weight(dest_span) + Span_Get_Weight(source_span); Span_Set_Weight(dest_span, new_weight); } } } // Leave holes instead of spans that don't have any score. dest_tick = 0; for (size_t i = 0; i < num_raw_flattened; i++) { Span *span = (Span*)Vec_Fetch(flattened, i); if (Span_Get_Weight(span)) { Vec_Store(flattened, dest_tick++, INCREF(span)); } } Vec_Excise(flattened, dest_tick, num_raw_flattened - dest_tick); return flattened; } }
static void S_parse_subqueries(QueryParser *self, Vector *elems) { const int32_t default_occur = QParser_IVARS(self)->default_occur; while (1) { // Work from the inside out, starting with the leftmost innermost // paren group. size_t left = SIZE_MAX; size_t right = SIZE_MAX; String *field = NULL; for (size_t i = 0, max = Vec_Get_Size(elems); i < max; i++) { ParserElem *elem = (ParserElem*)Vec_Fetch(elems, i); uint32_t type = ParserElem_Get_Type(elem); if (type == TOKEN_OPEN_PAREN) { left = i; } else if (type == TOKEN_CLOSE_PAREN) { right = i; break; } else if (type == TOKEN_FIELD && i < max - 1) { // If a field applies to an enclosing paren, pass it along. ParserElem *next_elem = (ParserElem*)Vec_Fetch(elems, i + 1); uint32_t next_type = ParserElem_Get_Type(next_elem); if (next_type == TOKEN_OPEN_PAREN) { field = (String*)ParserElem_As(elem, STRING); } } } // Break out of loop when there are no parens left. if (right == SIZE_MAX) { break; } // Create the subquery. Vector *sub_elems = Vec_Slice(elems, left + 1, right - left - 1); Query *subquery = S_parse_subquery(self, sub_elems, field, true); ParserElem *new_elem = ParserElem_new(TOKEN_QUERY, (Obj*)subquery); if (default_occur == MUST) { ParserElem_Require(new_elem); } DECREF(sub_elems); // Replace the elements used to create the subquery with the subquery // itself. if (left > 0) { ParserElem *maybe_field = (ParserElem*)Vec_Fetch(elems, left - 1); uint32_t maybe_field_type = ParserElem_Get_Type(maybe_field); if (maybe_field_type == TOKEN_FIELD) { left -= 1; } } Vec_Excise(elems, left + 1, right - left); Vec_Store(elems, left, (Obj*)new_elem); } }
Vector* IxManager_Recycle_IMP(IndexManager *self, PolyReader *reader, DeletionsWriter *del_writer, int64_t cutoff, bool optimize) { Vector *seg_readers = PolyReader_Get_Seg_Readers(reader); size_t num_seg_readers = Vec_Get_Size(seg_readers); SegReader **candidates = (SegReader**)MALLOCATE(num_seg_readers * sizeof(SegReader*)); size_t num_candidates = 0; for (size_t i = 0; i < num_seg_readers; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(seg_readers, i); if (SegReader_Get_Seg_Num(seg_reader) > cutoff) { candidates[num_candidates++] = seg_reader; } } Vector *recyclables = Vec_new(num_candidates); if (optimize) { for (size_t i = 0; i < num_candidates; i++) { Vec_Push(recyclables, INCREF(candidates[i])); } FREEMEM(candidates); return recyclables; } // Sort by ascending size in docs, choose sparsely populated segments. qsort(candidates, num_candidates, sizeof(SegReader*), S_compare_doc_count); int32_t *counts = (int32_t*)MALLOCATE(num_candidates * sizeof(int32_t)); for (uint32_t i = 0; i < num_candidates; i++) { counts[i] = SegReader_Doc_Count(candidates[i]); } I32Array *doc_counts = I32Arr_new_steal(counts, num_candidates); uint32_t threshold = IxManager_Choose_Sparse(self, doc_counts); DECREF(doc_counts); // Move SegReaders to be recycled. for (uint32_t i = 0; i < threshold; i++) { Vec_Store(recyclables, i, INCREF(candidates[i])); } // Find segments where at least 10% of all docs have been deleted. for (uint32_t i = threshold; i < num_candidates; i++) { SegReader *seg_reader = candidates[i]; String *seg_name = SegReader_Get_Seg_Name(seg_reader); double doc_max = SegReader_Doc_Max(seg_reader); double num_deletions = DelWriter_Seg_Del_Count(del_writer, seg_name); double del_proportion = num_deletions / doc_max; if (del_proportion >= 0.1) { Vec_Push(recyclables, INCREF(seg_reader)); } } FREEMEM(candidates); return recyclables; }
static Obj* S_dump_array(Vector *array) { Vector *dump = Vec_new(Vec_Get_Size(array)); for (size_t i = 0, max = Vec_Get_Size(array); i < max; i++) { Obj *elem = Vec_Fetch(array, i); if (elem) { Vec_Store(dump, i, Freezer_dump(elem)); } } return (Obj*)dump; }
Obj* S_load_from_array(Vector *dump) { Vector *loaded = Vec_new(Vec_Get_Size(dump)); for (size_t i = 0, max = Vec_Get_Size(dump); i < max; i++) { Obj *elem_dump = Vec_Fetch(dump, i); if (elem_dump) { Vec_Store(loaded, i, Freezer_load(elem_dump)); } } return (Obj*)loaded; }
static void S_round_trip_integer(TestBatchRunner *runner, int64_t value) { Integer *num = Int_new(value); Vector *array = Vec_new(1); Vec_Store(array, 0, (Obj*)num); String *json = Json_to_json((Obj*)array); Obj *dump = Json_from_json(json); TEST_TRUE(runner, Vec_Equals(array, dump), "Round trip integer %ld", (long)value); DECREF(dump); DECREF(json); DECREF(array); }
Vector* Freezer_deserialize_varray(Vector *array, InStream *instream) { uint32_t size = InStream_Read_C32(instream); Vec_init(array, size); for (uint32_t tick = InStream_Read_C32(instream); tick < size; tick += InStream_Read_C32(instream) ) { Obj *obj = THAW(instream); Vec_Store(array, tick, obj); } Vec_Resize(array, size); return array; }
static void S_round_trip_float(TestBatchRunner *runner, double value, double max_diff) { Float *num = Float_new(value); Vector *array = Vec_new(1); Vec_Store(array, 0, (Obj*)num); String *json = Json_to_json((Obj*)array); Obj *dump = CERTIFY(Json_from_json(json), VECTOR); Float *got = (Float*)CERTIFY(Vec_Fetch((Vector*)dump, 0), FLOAT); double diff = Float_Get_Value(num) - Float_Get_Value(got); if (diff < 0) { diff = 0 - diff; } TEST_TRUE(runner, diff <= max_diff, "Round trip float %f", value); DECREF(dump); DECREF(json); DECREF(array); }
static void S_do_prune(QueryParser *self, Query *query) { if (Query_is_a(query, NOTQUERY)) { // Don't allow double negatives. NOTQuery *not_query = (NOTQuery*)query; Query *neg_query = NOTQuery_Get_Negated_Query(not_query); if (!Query_is_a(neg_query, MATCHALLQUERY) && !S_has_valid_clauses(neg_query) ) { MatchAllQuery *matchall = MatchAllQuery_new(); NOTQuery_Set_Negated_Query(not_query, (Query*)matchall); DECREF(matchall); } } else if (Query_is_a(query, POLYQUERY)) { PolyQuery *polyquery = (PolyQuery*)query; Vector *children = PolyQuery_Get_Children(polyquery); // Recurse. for (uint32_t i = 0, max = Vec_Get_Size(children); i < max; i++) { Query *child = (Query*)Vec_Fetch(children, i); S_do_prune(self, child); } if (PolyQuery_is_a(polyquery, REQUIREDOPTIONALQUERY) || PolyQuery_is_a(polyquery, ORQUERY) ) { // Don't allow 'foo OR (-bar)'. Vector *children = PolyQuery_Get_Children(polyquery); for (uint32_t i = 0, max = Vec_Get_Size(children); i < max; i++) { Query *child = (Query*)Vec_Fetch(children, i); if (!S_has_valid_clauses(child)) { Vec_Store(children, i, (Obj*)NoMatchQuery_new()); } } } else if (PolyQuery_is_a(polyquery, ANDQUERY)) { // Don't allow '(-bar AND -baz)'. if (!S_has_valid_clauses((Query*)polyquery)) { Vector *children = PolyQuery_Get_Children(polyquery); Vec_Clear(children); } } } }
static SortFieldWriter* S_lazy_init_field_writer(SortWriter *self, int32_t field_num) { SortWriterIVARS *const ivars = SortWriter_IVARS(self); SortFieldWriter *field_writer = (SortFieldWriter*)Vec_Fetch(ivars->field_writers, (size_t)field_num); if (!field_writer) { // Open temp files. if (!ivars->temp_ord_out) { Folder *folder = ivars->folder; String *seg_name = Seg_Get_Name(ivars->segment); String *ord_path = Str_newf("%o/sort_ord_temp", seg_name); ivars->temp_ord_out = Folder_Open_Out(folder, ord_path); DECREF(ord_path); if (!ivars->temp_ord_out) { RETHROW(INCREF(Err_get_error())); } String *ix_path = Str_newf("%o/sort_ix_temp", seg_name); ivars->temp_ix_out = Folder_Open_Out(folder, ix_path); DECREF(ix_path); if (!ivars->temp_ix_out) { RETHROW(INCREF(Err_get_error())); } String *dat_path = Str_newf("%o/sort_dat_temp", seg_name); ivars->temp_dat_out = Folder_Open_Out(folder, dat_path); DECREF(dat_path); if (!ivars->temp_dat_out) { RETHROW(INCREF(Err_get_error())); } } String *field = Seg_Field_Name(ivars->segment, field_num); field_writer = SortFieldWriter_new(ivars->schema, ivars->snapshot, ivars->segment, ivars->polyreader, field, ivars->counter, ivars->mem_thresh, ivars->temp_ord_out, ivars->temp_ix_out, ivars->temp_dat_out); Vec_Store(ivars->field_writers, (size_t)field_num, (Obj*)field_writer); } return field_writer; }
DefaultDeletionsWriter* DefDelWriter_init(DefaultDeletionsWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader) { DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader); DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); ivars->seg_readers = PolyReader_Seg_Readers(polyreader); size_t num_seg_readers = Vec_Get_Size(ivars->seg_readers); ivars->seg_starts = PolyReader_Offsets(polyreader); ivars->bit_vecs = Vec_new(num_seg_readers); ivars->updated = (bool*)CALLOCATE(num_seg_readers, sizeof(bool)); ivars->searcher = IxSearcher_new((Obj*)polyreader); ivars->name_to_tick = Hash_new(num_seg_readers); // Materialize a BitVector of deletions for each segment. for (size_t i = 0; i < num_seg_readers; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(ivars->seg_readers, i); BitVector *bit_vec = BitVec_new((size_t)SegReader_Doc_Max(seg_reader)); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Fetch( seg_reader, Class_Get_Name(DELETIONSREADER)); Matcher *seg_dels = del_reader ? DelReader_Iterator(del_reader) : NULL; if (seg_dels) { int32_t del; while (0 != (del = Matcher_Next(seg_dels))) { BitVec_Set(bit_vec, (size_t)del); } DECREF(seg_dels); } Vec_Store(ivars->bit_vecs, i, (Obj*)bit_vec); Hash_Store(ivars->name_to_tick, SegReader_Get_Seg_Name(seg_reader), (Obj*)Int_new((int64_t)i)); } return self; }
DefaultLexiconReader* DefLexReader_init(DefaultLexiconReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, Vector *segments, int32_t seg_tick) { // Init. LexReader_init((LexiconReader*)self, schema, folder, snapshot, segments, seg_tick); DefaultLexiconReaderIVARS *const ivars = DefLexReader_IVARS(self); Segment *segment = DefLexReader_Get_Segment(self); // Build an array of SegLexicon objects. ivars->lexicons = Vec_new(Schema_Num_Fields(schema)); for (uint32_t i = 1, max = Schema_Num_Fields(schema) + 1; i < max; i++) { String *field = Seg_Field_Name(segment, (int32_t)i); if (field && S_has_data(schema, folder, segment, field)) { SegLexicon *lexicon = SegLex_new(schema, folder, segment, field); Vec_Store(ivars->lexicons, i, (Obj*)lexicon); } } return self; }
void SortColl_Collect_IMP(SortCollector *self, int32_t doc_id) { SortCollectorIVARS *const ivars = SortColl_IVARS(self); // Add to the total number of hits. ivars->total_hits++; // Collect this hit if it's competitive. if (SI_competitive(ivars, doc_id)) { MatchDoc *const match_doc = ivars->bumped; MatchDocIVARS *const match_doc_ivars = MatchDoc_IVARS(match_doc); match_doc_ivars->doc_id = doc_id + ivars->base; if (ivars->need_score && match_doc_ivars->score == CHY_F32_NEGINF) { match_doc_ivars->score = Matcher_Score(ivars->matcher); } // Fetch values so that cross-segment sorting can work. if (ivars->need_values) { Vector *values = match_doc_ivars->values; for (uint32_t i = 0, max = ivars->num_rules; i < max; i++) { SortCache *cache = ivars->sort_caches[i]; Obj *old_val = Vec_Delete(values, i); DECREF(old_val); if (cache) { int32_t ord = SortCache_Ordinal(cache, doc_id); Obj *val = SortCache_Value(cache, ord); if (val) { Vec_Store(values, i, (Obj*)val); } } } } // Insert the new MatchDoc. ivars->bumped = (MatchDoc*)HitQ_Jostle(ivars->hit_q, (Obj*)match_doc); if (ivars->bumped) { if (ivars->bumped == match_doc) { /* The queue is full, and we have established a threshold for * this segment as to what sort of document is definitely not * acceptable. Turn off AUTO_ACCEPT and start actually * testing whether hits are competitive. */ ivars->bubble_score = match_doc_ivars->score; ivars->bubble_doc = doc_id; ivars->actions = ivars->derived_actions; } // Recycle. MatchDoc_IVARS(ivars->bumped)->score = ivars->need_score ? CHY_F32_NEGINF : CHY_F32_NAN; } else { // The queue isn't full yet, so create a fresh MatchDoc. Vector *values = ivars->need_values ? Vec_new(ivars->num_rules) : NULL; float fake_score = ivars->need_score ? CHY_F32_NEGINF : CHY_F32_NAN; ivars->bumped = MatchDoc_new(INT32_MAX, fake_score, values); DECREF(values); } } }