static InverterEntry* S_fetch_entry(InverterIVARS *ivars, CharBuf *field) { Schema *const schema = ivars->schema; int32_t field_num = Seg_Field_Num(ivars->segment, field); if (!field_num) { // This field seems not to be in the segment yet. Try to find it in // the Schema. if (Schema_Fetch_Type(schema, field)) { // The field is in the Schema. Get a field num from the Segment. field_num = Seg_Add_Field(ivars->segment, field); } else { // We've truly failed to find the field. The user must // not have spec'd it. THROW(ERR, "Unknown field name: '%o'", field); } } InverterEntry *entry = (InverterEntry*)VA_Fetch(ivars->entry_pool, field_num); if (!entry) { entry = InvEntry_new(schema, (CharBuf*)field, field_num); VA_Store(ivars->entry_pool, field_num, (Obj*)entry); } return entry; }
VArray* HeatMap_Flatten_Spans_IMP(HeatMap *self, VArray *spans) { const uint32_t num_spans = VA_Get_Size(spans); UNUSED_VAR(self); if (!num_spans) { return VA_new(0); } else { VArray *flattened = S_flattened_but_empty_spans(spans); const uint32_t num_raw_flattened = VA_Get_Size(flattened); // Iterate over each of the source spans, contributing their scores to // any destination span that falls within range. uint32_t dest_tick = 0; for (uint32_t i = 0; i < num_spans; i++) { Span *source_span = (Span*)VA_Fetch(spans, i); int32_t source_span_offset = Span_Get_Offset(source_span); int32_t source_span_len = Span_Get_Length(source_span); int32_t source_span_end = source_span_offset + source_span_len; // Get the location of the flattened span that shares the source // span's offset. for (; dest_tick < num_raw_flattened; dest_tick++) { Span *dest_span = (Span*)VA_Fetch(flattened, dest_tick); if (Span_Get_Offset(dest_span) == source_span_offset) { break; } } // Fill in scores. for (uint32_t j = dest_tick; j < num_raw_flattened; j++) { Span *dest_span = (Span*)VA_Fetch(flattened, j); if (Span_Get_Offset(dest_span) == source_span_end) { break; } else { float new_weight = Span_Get_Weight(dest_span) + Span_Get_Weight(source_span); Span_Set_Weight(dest_span, new_weight); } } } // Leave holes instead of spans that don't have any score. dest_tick = 0; for (uint32_t i = 0; i < num_raw_flattened; i++) { Span *span = (Span*)VA_Fetch(flattened, i); if (Span_Get_Weight(span)) { VA_Store(flattened, dest_tick++, INCREF(span)); } } VA_Excise(flattened, dest_tick, num_raw_flattened - dest_tick); return flattened; } }
VArray* VA_dump(VArray *self) { VArray *dump = VA_new(self->size); u32_t i, max; for (i = 0, max = self->size; i < max; i++) { Obj *elem = VA_Fetch(self, i); if (elem) { VA_Store(dump, i, Obj_Dump(elem)); } } return dump; }
static void S_init_sub_readers(PolyReader *self, VArray *sub_readers) { PolyReaderIVARS *const ivars = PolyReader_IVARS(self); uint32_t num_sub_readers = VA_Get_Size(sub_readers); int32_t *starts = (int32_t*)MALLOCATE(num_sub_readers * sizeof(int32_t)); Hash *data_readers = Hash_new(0); DECREF(ivars->sub_readers); DECREF(ivars->offsets); ivars->sub_readers = (VArray*)INCREF(sub_readers); // Accumulate doc_max, subreader start offsets, and DataReaders. ivars->doc_max = 0; for (uint32_t i = 0; i < num_sub_readers; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(sub_readers, i); Hash *components = SegReader_Get_Components(seg_reader); CharBuf *api; DataReader *component; starts[i] = ivars->doc_max; ivars->doc_max += SegReader_Doc_Max(seg_reader); Hash_Iterate(components); while (Hash_Next(components, (Obj**)&api, (Obj**)&component)) { VArray *readers = (VArray*)Hash_Fetch(data_readers, (Obj*)api); if (!readers) { readers = VA_new(num_sub_readers); Hash_Store(data_readers, (Obj*)api, (Obj*)readers); } VA_Store(readers, i, INCREF(component)); } } ivars->offsets = I32Arr_new_steal(starts, num_sub_readers); CharBuf *api; VArray *readers; Hash_Iterate(data_readers); while (Hash_Next(data_readers, (Obj**)&api, (Obj**)&readers)) { DataReader *datareader = (DataReader*)CERTIFY(S_first_non_null(readers), DATAREADER); DataReader *aggregator = DataReader_Aggregator(datareader, readers, ivars->offsets); if (aggregator) { CERTIFY(aggregator, DATAREADER); Hash_Store(ivars->components, (Obj*)api, (Obj*)aggregator); } } DECREF(data_readers); DeletionsReader *del_reader = (DeletionsReader*)Hash_Fetch( ivars->components, (Obj*)VTable_Get_Name(DELETIONSREADER)); ivars->del_count = del_reader ? DelReader_Del_Count(del_reader) : 0; }
static void S_round_trip_integer(TestBatch *batch, int64_t value) { Integer64 *num = Int64_new(value); VArray *array = VA_new(1); VA_Store(array, 0, (Obj*)num); CharBuf *json = Json_to_json((Obj*)array); Obj *dump = Json_from_json(json); TEST_TRUE(batch, VA_Equals(array, dump), "Round trip integer %ld", (long)value); DECREF(dump); DECREF(json); DECREF(array); }
VArray* IxManager_recycle(IndexManager *self, PolyReader *reader, DeletionsWriter *del_writer, int64_t cutoff, bool_t optimize) { VArray *seg_readers = PolyReader_Get_Seg_Readers(reader); VArray *candidates = VA_Gather(seg_readers, S_check_cutoff, &cutoff); VArray *recyclables = VA_new(VA_Get_Size(candidates)); const uint32_t num_candidates = VA_Get_Size(candidates); if (optimize) { DECREF(recyclables); return candidates; } // Sort by ascending size in docs, choose sparsely populated segments. VA_Sort(candidates, S_compare_doc_count, NULL); int32_t *counts = (int32_t*)MALLOCATE(num_candidates * sizeof(int32_t)); for (uint32_t i = 0; i < num_candidates; i++) { SegReader *seg_reader = (SegReader*)CERTIFY( VA_Fetch(candidates, i), SEGREADER); counts[i] = SegReader_Doc_Count(seg_reader); } I32Array *doc_counts = I32Arr_new_steal(counts, num_candidates); uint32_t threshold = IxManager_Choose_Sparse(self, doc_counts); DECREF(doc_counts); // Move SegReaders to be recycled. for (uint32_t i = 0; i < threshold; i++) { VA_Store(recyclables, i, VA_Delete(candidates, i)); } // Find segments where at least 10% of all docs have been deleted. for (uint32_t i = threshold; i < num_candidates; i++) { SegReader *seg_reader = (SegReader*)VA_Delete(candidates, i); CharBuf *seg_name = SegReader_Get_Seg_Name(seg_reader); double doc_max = SegReader_Doc_Max(seg_reader); double num_deletions = DelWriter_Seg_Del_Count(del_writer, seg_name); double del_proportion = num_deletions / doc_max; if (del_proportion >= 0.1) { VA_Push(recyclables, (Obj*)seg_reader); } else { DECREF(seg_reader); } } DECREF(candidates); return recyclables; }
static void S_round_trip_float(TestBatch *batch, double value, double max_diff) { Float64 *num = Float64_new(value); VArray *array = VA_new(1); VA_Store(array, 0, (Obj*)num); CharBuf *json = Json_to_json((Obj*)array); Obj *dump = CERTIFY(Json_from_json(json), VARRAY); Float64 *got = (Float64*)CERTIFY(VA_Fetch((VArray*)dump, 0), FLOAT64); double diff = Float64_Get_Value(num) - Float64_Get_Value(got); if (diff < 0) { diff = 0 - diff; } TEST_TRUE(batch, diff <= max_diff, "Round trip float %f", value); DECREF(dump); DECREF(json); DECREF(array); }
VArray* PriQ_Pop_All_IMP(PriorityQueue *self) { PriorityQueueIVARS *const ivars = PriQ_IVARS(self); VArray *retval = VA_new(ivars->size); // Map the queue nodes onto the array in reverse order. if (ivars->size) { for (uint32_t i = ivars->size; i--;) { Obj *const elem = PriQ_Pop(self); VA_Store(retval, i, elem); } } return retval; }
VArray* VA_load(VArray *self, Obj *dump) { VArray *source = (VArray*)ASSERT_IS_A(dump, VARRAY); VArray *loaded = VA_new(source->size); u32_t i, max; UNUSED_VAR(self); for (i = 0, max = source->size; i < max; i++) { Obj *elem_dump = VA_Fetch(source, i); if (elem_dump) { VA_Store(loaded, i, Obj_Load(elem_dump, elem_dump)); } } return loaded; }
void MemPool_eat(MemoryPool *self, MemoryPool *other) { i32_t i; if (self->buf != NULL) THROW("Memory pool is not empty"); /* Move active arenas from other to self. */ for (i = 0; i <= other->tick; i++) { ByteBuf *arena = (ByteBuf*)VA_Shift(other->arenas); /* Maybe displace existing arena. */ VA_Store(self->arenas, i, (Obj*)arena); } self->tick = other->tick; self->last_buf = other->last_buf; self->buf = other->buf; self->limit = other->limit; }
static SortFieldWriter* S_lazy_init_field_writer(SortWriter *self, int32_t field_num) { SortWriterIVARS *const ivars = SortWriter_IVARS(self); SortFieldWriter *field_writer = (SortFieldWriter*)VA_Fetch(ivars->field_writers, field_num); if (!field_writer) { // Open temp files. if (!ivars->temp_ord_out) { Folder *folder = ivars->folder; CharBuf *seg_name = Seg_Get_Name(ivars->segment); CharBuf *path = CB_newf("%o/sort_ord_temp", seg_name); ivars->temp_ord_out = Folder_Open_Out(folder, path); if (!ivars->temp_ord_out) { DECREF(path); RETHROW(INCREF(Err_get_error())); } CB_setf(path, "%o/sort_ix_temp", seg_name); ivars->temp_ix_out = Folder_Open_Out(folder, path); if (!ivars->temp_ix_out) { DECREF(path); RETHROW(INCREF(Err_get_error())); } CB_setf(path, "%o/sort_dat_temp", seg_name); ivars->temp_dat_out = Folder_Open_Out(folder, path); if (!ivars->temp_dat_out) { DECREF(path); RETHROW(INCREF(Err_get_error())); } DECREF(path); } CharBuf *field = Seg_Field_Name(ivars->segment, field_num); field_writer = SortFieldWriter_new(ivars->schema, ivars->snapshot, ivars->segment, ivars->polyreader, field, ivars->mem_pool, ivars->mem_thresh, ivars->temp_ord_out, ivars->temp_ix_out, ivars->temp_dat_out); VA_Store(ivars->field_writers, field_num, (Obj*)field_writer); } return field_writer; }
DefaultDeletionsWriter* DefDelWriter_init(DefaultDeletionsWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader) { DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader); DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); ivars->seg_readers = PolyReader_Seg_Readers(polyreader); uint32_t num_seg_readers = VA_Get_Size(ivars->seg_readers); ivars->seg_starts = PolyReader_Offsets(polyreader); ivars->bit_vecs = VA_new(num_seg_readers); ivars->updated = (bool*)CALLOCATE(num_seg_readers, sizeof(bool)); ivars->searcher = IxSearcher_new((Obj*)polyreader); ivars->name_to_tick = Hash_new(num_seg_readers); // Materialize a BitVector of deletions for each segment. for (uint32_t i = 0; i < num_seg_readers; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i); BitVector *bit_vec = BitVec_new(SegReader_Doc_Max(seg_reader)); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Fetch( seg_reader, Class_Get_Name(DELETIONSREADER)); Matcher *seg_dels = del_reader ? DelReader_Iterator(del_reader) : NULL; if (seg_dels) { int32_t del; while (0 != (del = Matcher_Next(seg_dels))) { BitVec_Set(bit_vec, del); } DECREF(seg_dels); } VA_Store(ivars->bit_vecs, i, (Obj*)bit_vec); Hash_Store(ivars->name_to_tick, (Obj*)SegReader_Get_Seg_Name(seg_reader), (Obj*)Int32_new(i)); } return self; }
DefaultLexiconReader* DefLexReader_init(DefaultLexiconReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, VArray *segments, int32_t seg_tick) { // Init. LexReader_init((LexiconReader*)self, schema, folder, snapshot, segments, seg_tick); DefaultLexiconReaderIVARS *const ivars = DefLexReader_IVARS(self); Segment *segment = DefLexReader_Get_Segment(self); // Build an array of SegLexicon objects. ivars->lexicons = VA_new(Schema_Num_Fields(schema)); for (uint32_t i = 1, max = Schema_Num_Fields(schema) + 1; i < max; i++) { String *field = Seg_Field_Name(segment, i); if (field && S_has_data(schema, folder, segment, field)) { SegLexicon *lexicon = SegLex_new(schema, folder, segment, field); VA_Store(ivars->lexicons, i, (Obj*)lexicon); } } return self; }
DefaultLexiconReader* DefLexReader_init(DefaultLexiconReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, VArray *segments, i32_t seg_tick) { Segment *segment; u32_t i, max; /* Init. */ LexReader_init((LexiconReader*)self, schema, folder, snapshot, segments, seg_tick); segment = DefLexReader_Get_Segment(self); /* Build an array of SegLexicon objects. */ self->lexicons = VA_new(Schema_Num_Fields(schema)); for (i = 1, max = Schema_Num_Fields(schema) + 1; i < max; i++) { CharBuf *field = Seg_Field_Name(segment, i); if (field && S_has_data(schema, folder, segment, field)) { SegLexicon *lexicon = SegLex_new(schema, folder, segment, field); VA_Store(self->lexicons, i, (Obj*)lexicon); } } return self; }
void NOTQuery_set_negated_query(NOTQuery *self, Query *negated_query) { VA_Store(self->children, 0, INCREF(negated_query)); }
void NOTQuery_set_negated_query(NOTQuery *self, Query *negated_query) { NOTQueryIVARS *const ivars = NOTQuery_IVARS(self); VA_Store(ivars->children, 0, INCREF(negated_query)); }
void SortColl_collect(SortCollector *self, int32_t doc_id) { SortCollectorIVARS *const ivars = SortColl_IVARS(self); // Add to the total number of hits. ivars->total_hits++; // Collect this hit if it's competitive. if (SI_competitive(ivars, doc_id)) { MatchDoc *const match_doc = ivars->bumped; MatchDocIVARS *const match_doc_ivars = MatchDoc_IVARS(match_doc); match_doc_ivars->doc_id = doc_id + ivars->base; if (ivars->need_score && match_doc_ivars->score == F32_NEGINF) { match_doc_ivars->score = Matcher_Score(ivars->matcher); } // Fetch values so that cross-segment sorting can work. if (ivars->need_values) { VArray *values = match_doc_ivars->values; for (uint32_t i = 0, max = ivars->num_rules; i < max; i++) { SortCache *cache = ivars->sort_caches[i]; Obj *old_val = (Obj*)VA_Delete(values, i); if (cache) { int32_t ord = SortCache_Ordinal(cache, doc_id); Obj *blank = old_val ? old_val : SortCache_Make_Blank(cache); Obj *val = SortCache_Value(cache, ord, blank); if (val) { VA_Store(values, i, (Obj*)val); } else { DECREF(blank); } } } } // Insert the new MatchDoc. ivars->bumped = (MatchDoc*)HitQ_Jostle(ivars->hit_q, (Obj*)match_doc); if (ivars->bumped) { if (ivars->bumped == match_doc) { /* The queue is full, and we have established a threshold for * this segment as to what sort of document is definitely not * acceptable. Turn off AUTO_ACCEPT and start actually * testing whether hits are competitive. */ ivars->bubble_score = match_doc_ivars->score; ivars->bubble_doc = doc_id; ivars->actions = ivars->derived_actions; } // Recycle. MatchDoc_IVARS(ivars->bumped)->score = ivars->need_score ? F32_NEGINF : F32_NAN; } else { // The queue isn't full yet, so create a fresh MatchDoc. VArray *values = ivars->need_values ? VA_new(ivars->num_rules) : NULL; float fake_score = ivars->need_score ? F32_NEGINF : F32_NAN; ivars->bumped = MatchDoc_new(INT32_MAX, fake_score, values); DECREF(values); } } }