PolyReader* PolyReader_init(PolyReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, IndexManager *manager, VArray *sub_readers) { PolyReaderIVARS *const ivars = PolyReader_IVARS(self); ivars->doc_max = 0; ivars->del_count = 0; if (sub_readers) { uint32_t num_segs = VA_Get_Size(sub_readers); VArray *segments = VA_new(num_segs); for (uint32_t i = 0; i < num_segs; i++) { SegReader *seg_reader = (SegReader*)CERTIFY(VA_Fetch(sub_readers, i), SEGREADER); VA_Push(segments, INCREF(SegReader_Get_Segment(seg_reader))); } IxReader_init((IndexReader*)self, schema, folder, snapshot, segments, -1, manager); DECREF(segments); S_init_sub_readers(self, sub_readers); } else { IxReader_init((IndexReader*)self, schema, folder, snapshot, NULL, -1, manager); ivars->sub_readers = VA_new(0); ivars->offsets = I32Arr_new_steal(NULL, 0); } return self; }
bool_t Folder_delete_tree(Folder *self, const CharBuf *path) { Folder *enclosing_folder = Folder_Enclosing_Folder(self, path); // Don't allow Folder to delete itself. if (!path || !CB_Get_Size(path)) { return false; } if (enclosing_folder) { ZombieCharBuf *local = IxFileNames_local_part(path, ZCB_BLANK()); if (Folder_Local_Is_Directory(enclosing_folder, (CharBuf*)local)) { Folder *inner_folder = Folder_Local_Find_Folder(enclosing_folder, (CharBuf*)local); DirHandle *dh = Folder_Local_Open_Dir(inner_folder); if (dh) { VArray *files = VA_new(20); VArray *dirs = VA_new(20); CharBuf *entry = DH_Get_Entry(dh); while (DH_Next(dh)) { VA_Push(files, (Obj*)CB_Clone(entry)); if (DH_Entry_Is_Dir(dh) && !DH_Entry_Is_Symlink(dh)) { VA_Push(dirs, (Obj*)CB_Clone(entry)); } } for (uint32_t i = 0, max = VA_Get_Size(dirs); i < max; i++) { CharBuf *name = (CharBuf*)VA_Fetch(files, i); bool_t success = Folder_Delete_Tree(inner_folder, name); if (!success && Folder_Local_Exists(inner_folder, name)) { break; } } for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) { CharBuf *name = (CharBuf*)VA_Fetch(files, i); bool_t success = Folder_Local_Delete(inner_folder, name); if (!success && Folder_Local_Exists(inner_folder, name)) { break; } } DECREF(dirs); DECREF(files); DECREF(dh); } } return Folder_Local_Delete(enclosing_folder, (CharBuf*)local); } else { // Return failure if the entry wasn't there in the first place. return false; } }
PolyPostingList* PolyPList_init(PolyPostingList *self, const CharBuf *field, VArray *readers, I32Array *starts) { u32_t i; const u32_t num_readers = VA_Get_Size(readers); /* Init. */ self->tick = 0; self->current = NULL; /* Assign. */ self->field = CB_Clone(field); /* Get sub-posting_lists and assign offsets. */ self->sub_plists = VA_new(num_readers); for (i = 0; i < num_readers; i++) { PostingsReader *const post_reader = (PostingsReader*)ASSERT_IS_A( VA_Fetch(readers, i), POSTINGSREADER); i32_t offset = I32Arr_Get(starts, i); SegPostingList *sub_plist = (SegPostingList*)PostReader_Posting_List( post_reader, field, NULL); if (sub_plist) { ASSERT_IS_A(sub_plist, SEGPOSTINGLIST); SegPList_Set_Doc_Base(sub_plist, offset); VA_Push(self->sub_plists, (Obj*)sub_plist); } } self->num_subs = VA_Get_Size(self->sub_plists); return self; }
static SeriesMatcher* S_make_series_matcher(I32Array *doc_ids, I32Array *offsets, int32_t doc_max) { int32_t num_doc_ids = I32Arr_Get_Size(doc_ids); int32_t num_matchers = I32Arr_Get_Size(offsets); VArray *matchers = VA_new(num_matchers); int32_t tick = 0; int32_t i; // Divvy up doc_ids by segment into BitVectors. for (i = 0; i < num_matchers; i++) { int32_t offset = I32Arr_Get(offsets, i); int32_t max = i == num_matchers - 1 ? doc_max + 1 : I32Arr_Get(offsets, i + 1); BitVector *bit_vec = BitVec_new(max - offset); while (tick < num_doc_ids) { int32_t doc_id = I32Arr_Get(doc_ids, tick); if (doc_id > max) { break; } else { tick++; } BitVec_Set(bit_vec, doc_id - offset); } VA_Push(matchers, (Obj*)BitVecMatcher_new(bit_vec)); DECREF(bit_vec); } SeriesMatcher *series_matcher = SeriesMatcher_new(matchers, offsets); DECREF(matchers); return series_matcher; }
void BBSortEx_flush(BBSortEx *self) { u32_t cache_count = self->cache_max - self->cache_tick; VArray *elems; BBSortExRun *run; u32_t i; if (!cache_count) return; else elems = VA_new(cache_count); /* Sort, then create a new run. */ BBSortEx_Sort_Cache(self); for (i = self->cache_tick; i < self->cache_max; i++) { VA_Push(elems, self->cache[i]); } run = BBSortExRun_new(elems); DECREF(elems); BBSortEx_Add_Run(self, (SortExRun*)run); DECREF(run); /* Blank the cache vars. */ self->cache_tick += cache_count; SortEx_Clear_Cache(self); }
VArray* HeatMap_generate_proximity_boosts(HeatMap *self, VArray *spans) { VArray *boosts = VA_new(0); const uint32_t num_spans = VA_Get_Size(spans); if (num_spans > 1) { for (uint32_t i = 0, max = num_spans - 1; i < max; i++ ) { Span *span1 = (Span*)VA_Fetch(spans, i); for (uint32_t j = i + 1; j <= max; j++) { Span *span2 = (Span*)VA_Fetch(spans, j); float prox_score = HeatMap_Calc_Proximity_Boost(self, span1, span2); if (prox_score == 0) { break; } else { int32_t length = (span2->offset - span1->offset) + span2->length; VA_Push(boosts, (Obj*)Span_new(span1->offset, length, prox_score)); } } } } return boosts; }
VArray* TermCompiler_Highlight_Spans_IMP(TermCompiler *self, Searcher *searcher, DocVector *doc_vec, String *field) { TermCompilerIVARS *const ivars = TermCompiler_IVARS(self); TermQueryIVARS *const parent_ivars = TermQuery_IVARS((TermQuery*)ivars->parent); VArray *spans = VA_new(0); TermVector *term_vector; I32Array *starts, *ends; UNUSED_VAR(searcher); if (!Str_Equals(parent_ivars->field, (Obj*)field)) { return spans; } // Add all starts and ends. term_vector = DocVec_Term_Vector(doc_vec, field, (String*)parent_ivars->term); if (!term_vector) { return spans; } starts = TV_Get_Start_Offsets(term_vector); ends = TV_Get_End_Offsets(term_vector); for (uint32_t i = 0, max = I32Arr_Get_Size(starts); i < max; i++) { int32_t start = I32Arr_Get(starts, i); int32_t length = I32Arr_Get(ends, i) - start; VA_Push(spans, (Obj*)Span_new(start, length, TermCompiler_Get_Weight(self))); } DECREF(term_vector); return spans; }
// Create a test data structure including at least one each of Hash, VArray, // and CharBuf. static Obj* S_make_dump() { Hash *dump = Hash_new(0); Hash_Store_Str(dump, "foo", 3, (Obj*)CB_newf("foo")); Hash_Store_Str(dump, "stuff", 5, (Obj*)VA_new(0)); return (Obj*)dump; }
VArray* SegReader_seg_readers(SegReader *self) { VArray *seg_readers = VA_new(1); VA_Push(seg_readers, INCREF(self)); return seg_readers; }
// Create all the spans needed by HeatMap_Flatten_Spans, based on the source // offsets and lengths... but leave the scores at 0. static VArray* S_flattened_but_empty_spans(VArray *spans) { const uint32_t num_spans = VA_Get_Size(spans); int32_t *bounds = (int32_t*)MALLOCATE((num_spans * 2) * sizeof(int32_t)); // Assemble a list of all unique start/end boundaries. for (uint32_t i = 0; i < num_spans; i++) { Span *span = (Span*)VA_Fetch(spans, i); bounds[i] = span->offset; bounds[i + num_spans] = span->offset + span->length; } Sort_quicksort(bounds, num_spans * 2, sizeof(uint32_t), S_compare_i32, NULL); uint32_t num_bounds = 0; int32_t last = I32_MAX; for (uint32_t i = 0; i < num_spans * 2; i++) { if (bounds[i] != last) { bounds[num_bounds++] = bounds[i]; last = bounds[i]; } } // Create one Span for each zone between two bounds. VArray *flattened = VA_new(num_bounds - 1); for (uint32_t i = 0; i < num_bounds - 1; i++) { int32_t start = bounds[i]; int32_t length = bounds[i + 1] - start; VA_Push(flattened, (Obj*)Span_new(start, length, 0.0f)); } FREEMEM(bounds); return flattened; }
static void test_calc_proximity_boost(TestBatchRunner *runner) { VArray *spans = VA_new(0); HeatMap *heat_map = HeatMap_new(spans, 133); Span *span1 = Span_new( 0, 10, 1.0f); Span *span2 = Span_new( 10, 10, 1.0f); Span *span3 = Span_new( 5, 4, 1.0f); Span *span4 = Span_new(100, 10, 1.0f); Span *span5 = Span_new(150, 10, 1.0f); float big_boost = HeatMap_Calc_Proximity_Boost(heat_map, span1, span2); float eq_big_boost = HeatMap_Calc_Proximity_Boost(heat_map, span1, span3); float smaller_boost = HeatMap_Calc_Proximity_Boost(heat_map, span1, span4); float zero_boost = HeatMap_Calc_Proximity_Boost(heat_map, span1, span5); TEST_TRUE(runner, big_boost == eq_big_boost, "overlapping and abutting produce the same proximity boost"); TEST_TRUE(runner, big_boost > smaller_boost, "closer is better"); TEST_TRUE(runner, zero_boost == 0.0, "distance outside of window yields no prox boost"); DECREF(span1); DECREF(span2); DECREF(span3); DECREF(span4); DECREF(span5); DECREF(heat_map); DECREF(spans); }
// Default to sort-by-score-then-doc-id. static VArray* S_default_sort_rules() { VArray *rules = VA_new(1); VA_Push(rules, (Obj*)SortRule_new(SortRule_SCORE, NULL, false)); VA_Push(rules, (Obj*)SortRule_new(SortRule_DOC_ID, NULL, false)); return rules; }
RAMFileDes* RAMFileDes_init(RAMFileDes *self, const CharBuf *path) { FileDes_init((FileDes*)self, path); self->buffers = VA_new(1); self->len = 0; return self; }
VArray* NOTCompiler_highlight_spans(NOTCompiler *self, Searcher *searcher, DocVector *doc_vec, const CharBuf *field) { UNUSED_VAR(self); UNUSED_VAR(searcher); UNUSED_VAR(doc_vec); UNUSED_VAR(field); return VA_new(0); }
VArray* Hash_Values_IMP(Hash *self) { Obj *key; Obj *val; VArray *values = VA_new(self->size); Hash_Iterate(self); while (Hash_Next(self, &key, &val)) { VA_Push(values, INCREF(val)); } return values; }
VArray* HeatMap_Flatten_Spans_IMP(HeatMap *self, VArray *spans) { const uint32_t num_spans = VA_Get_Size(spans); UNUSED_VAR(self); if (!num_spans) { return VA_new(0); } else { VArray *flattened = S_flattened_but_empty_spans(spans); const uint32_t num_raw_flattened = VA_Get_Size(flattened); // Iterate over each of the source spans, contributing their scores to // any destination span that falls within range. uint32_t dest_tick = 0; for (uint32_t i = 0; i < num_spans; i++) { Span *source_span = (Span*)VA_Fetch(spans, i); int32_t source_span_offset = Span_Get_Offset(source_span); int32_t source_span_len = Span_Get_Length(source_span); int32_t source_span_end = source_span_offset + source_span_len; // Get the location of the flattened span that shares the source // span's offset. for (; dest_tick < num_raw_flattened; dest_tick++) { Span *dest_span = (Span*)VA_Fetch(flattened, dest_tick); if (Span_Get_Offset(dest_span) == source_span_offset) { break; } } // Fill in scores. for (uint32_t j = dest_tick; j < num_raw_flattened; j++) { Span *dest_span = (Span*)VA_Fetch(flattened, j); if (Span_Get_Offset(dest_span) == source_span_end) { break; } else { float new_weight = Span_Get_Weight(dest_span) + Span_Get_Weight(source_span); Span_Set_Weight(dest_span, new_weight); } } } // Leave holes instead of spans that don't have any score. dest_tick = 0; for (uint32_t i = 0; i < num_raw_flattened; i++) { Span *span = (Span*)VA_Fetch(flattened, i); if (Span_Get_Weight(span)) { VA_Store(flattened, dest_tick++, INCREF(span)); } } VA_Excise(flattened, dest_tick, num_raw_flattened - dest_tick); return flattened; } }
VArray* VA_Gather_IMP(VArray *self, VA_Gather_Test_t test, void *data) { VArray *gathered = VA_new(self->size); for (uint32_t i = 0, max = self->size; i < max; i++) { if (test(self, i, data)) { Obj *elem = self->elems[i]; VA_Push(gathered, elem ? INCREF(elem) : NULL); } } return gathered; }
MemoryPool* MemPool_init(MemoryPool *self, uint32_t arena_size) { self->arena_size = arena_size == 0 ? DEFAULT_BUF_SIZE : arena_size; self->arenas = VA_new(16); self->tick = -1; self->buf = NULL; self->limit = NULL; self->consumed = 0; return self; }
VArray* Hash_Keys_IMP(Hash *self) { Obj *key; Obj *val; VArray *keys = VA_new(self->size); Hash_Iterate(self); while (Hash_Next(self, &key, &val)) { VA_Push(keys, INCREF(key)); } return keys; }
static void test_Keys_Values_Iter(TestBatch *batch) { Hash *hash = Hash_new(0); // trigger multiple rebuilds. VArray *expected = VA_new(100); VArray *keys; VArray *values; for (uint32_t i = 0; i < 500; i++) { CharBuf *cb = CB_newf("%u32", i); Hash_Store(hash, (Obj*)cb, (Obj*)cb); VA_Push(expected, INCREF(cb)); } VA_Sort(expected, NULL, NULL); keys = Hash_Keys(hash); values = Hash_Values(hash); VA_Sort(keys, NULL, NULL); VA_Sort(values, NULL, NULL); TEST_TRUE(batch, VA_Equals(keys, (Obj*)expected), "Keys"); TEST_TRUE(batch, VA_Equals(values, (Obj*)expected), "Values"); VA_Clear(keys); VA_Clear(values); { Obj *key; Obj *value; Hash_Iterate(hash); while (Hash_Next(hash, &key, &value)) { VA_Push(keys, INCREF(key)); VA_Push(values, INCREF(value)); } } VA_Sort(keys, NULL, NULL); VA_Sort(values, NULL, NULL); TEST_TRUE(batch, VA_Equals(keys, (Obj*)expected), "Keys from Iter"); TEST_TRUE(batch, VA_Equals(values, (Obj*)expected), "Values from Iter"); { ZombieCharBuf *forty = ZCB_WRAP_STR("40", 2); ZombieCharBuf *nope = ZCB_WRAP_STR("nope", 4); Obj *key = Hash_Find_Key(hash, (Obj*)forty, ZCB_Hash_Sum(forty)); TEST_TRUE(batch, Obj_Equals(key, (Obj*)forty), "Find_Key"); key = Hash_Find_Key(hash, (Obj*)nope, ZCB_Hash_Sum(nope)), TEST_TRUE(batch, key == NULL, "Find_Key returns NULL for non-existent key"); } DECREF(hash); DECREF(expected); DECREF(keys); DECREF(values); }
VArray* VA_dump(VArray *self) { VArray *dump = VA_new(self->size); u32_t i, max; for (i = 0, max = self->size; i < max; i++) { Obj *elem = VA_Fetch(self, i); if (elem) { VA_Store(dump, i, Obj_Dump(elem)); } } return dump; }
static void test_analysis(TestBatchRunner *runner) { CaseFolder *case_folder = CaseFolder_new(); String *source = Str_newf("caPiTal ofFensE"); VArray *wanted = VA_new(1); VA_Push(wanted, (Obj*)Str_newf("capital offense")); TestUtils_test_analyzer(runner, (Analyzer*)case_folder, source, wanted, "lowercase plain text"); DECREF(wanted); DECREF(source); DECREF(case_folder); }
SegWriter* SegWriter_init(SegWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader) { Architecture *arch = Schema_Get_Architecture(schema); DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader); SegWriterIVARS *const ivars = SegWriter_IVARS(self); ivars->by_api = Hash_new(0); ivars->inverter = Inverter_new(schema, segment); ivars->writers = VA_new(16); Arch_Init_Seg_Writer(arch, self); return self; }
Inverter* Inverter_init(Inverter *self, Schema *schema, Segment *segment) { InverterIVARS *const ivars = Inverter_IVARS(self); // Init. ivars->tick = -1; ivars->doc = NULL; ivars->sorted = false; ivars->blank = InvEntry_new(NULL, NULL, 0); ivars->current = ivars->blank; // Derive. ivars->entry_pool = VA_new(Schema_Num_Fields(schema)); ivars->entries = VA_new(Schema_Num_Fields(schema)); // Assign. ivars->schema = (Schema*)INCREF(schema); ivars->segment = (Segment*)INCREF(segment); return self; }
static void S_init_sub_readers(PolyReader *self, VArray *sub_readers) { PolyReaderIVARS *const ivars = PolyReader_IVARS(self); uint32_t num_sub_readers = VA_Get_Size(sub_readers); int32_t *starts = (int32_t*)MALLOCATE(num_sub_readers * sizeof(int32_t)); Hash *data_readers = Hash_new(0); DECREF(ivars->sub_readers); DECREF(ivars->offsets); ivars->sub_readers = (VArray*)INCREF(sub_readers); // Accumulate doc_max, subreader start offsets, and DataReaders. ivars->doc_max = 0; for (uint32_t i = 0; i < num_sub_readers; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(sub_readers, i); Hash *components = SegReader_Get_Components(seg_reader); CharBuf *api; DataReader *component; starts[i] = ivars->doc_max; ivars->doc_max += SegReader_Doc_Max(seg_reader); Hash_Iterate(components); while (Hash_Next(components, (Obj**)&api, (Obj**)&component)) { VArray *readers = (VArray*)Hash_Fetch(data_readers, (Obj*)api); if (!readers) { readers = VA_new(num_sub_readers); Hash_Store(data_readers, (Obj*)api, (Obj*)readers); } VA_Store(readers, i, INCREF(component)); } } ivars->offsets = I32Arr_new_steal(starts, num_sub_readers); CharBuf *api; VArray *readers; Hash_Iterate(data_readers); while (Hash_Next(data_readers, (Obj**)&api, (Obj**)&readers)) { DataReader *datareader = (DataReader*)CERTIFY(S_first_non_null(readers), DATAREADER); DataReader *aggregator = DataReader_Aggregator(datareader, readers, ivars->offsets); if (aggregator) { CERTIFY(aggregator, DATAREADER); Hash_Store(ivars->components, (Obj*)api, (Obj*)aggregator); } } DECREF(data_readers); DeletionsReader *del_reader = (DeletionsReader*)Hash_Fetch( ivars->components, (Obj*)VTable_Get_Name(DELETIONSREADER)); ivars->del_count = del_reader ? DelReader_Del_Count(del_reader) : 0; }
VArray* BBSortEx_Peek_Cache_IMP(BBSortEx *self) { BBSortExIVARS *const ivars = BBSortEx_IVARS(self); uint32_t count = ivars->buf_max - ivars->buf_tick; Obj **buffer = ivars->buffer; VArray *retval = VA_new(count); for (uint32_t i = ivars->buf_tick; i < ivars->buf_max; ++i) { VA_Push(retval, INCREF(buffer[i])); } return retval; }
static void S_round_trip_integer(TestBatch *batch, int64_t value) { Integer64 *num = Int64_new(value); VArray *array = VA_new(1); VA_Store(array, 0, (Obj*)num); CharBuf *json = Json_to_json((Obj*)array); Obj *dump = Json_from_json(json); TEST_TRUE(batch, VA_Equals(array, dump), "Round trip integer %ld", (long)value); DECREF(dump); DECREF(json); DECREF(array); }
Matcher* PhraseCompiler_make_matcher(PhraseCompiler *self, SegReader *reader, bool_t need_score) { PostingsReader *const post_reader = (PostingsReader*)SegReader_Fetch( reader, POSTINGSREADER.name); PhraseQuery *const parent = (PhraseQuery*)self->parent; VArray *const terms = parent->terms; u32_t num_terms = VA_Get_Size(terms); Schema *schema = SegReader_Get_Schema(reader); Posting *posting = Schema_Fetch_Posting(schema, parent->field); VArray *plists; Matcher *retval; u32_t i; UNUSED_VAR(need_score); /* Bail if there are no terms. */ if (!num_terms) return NULL; /* Bail unless field is valid and posting type supports positions. */ if (posting == NULL || !OBJ_IS_A(posting, SCOREPOSTING)) return NULL; /* Bail if there's no PostingsReader for this segment. */ if (!post_reader) { return NULL; } /* Look up each term. */ plists = VA_new(num_terms); for (i = 0; i < num_terms; i++) { Obj *term = VA_Fetch(terms, i); PostingList *plist = PostReader_Posting_List(post_reader, parent->field, term); /* Bail if any one of the terms isn't in the index. */ if (!plist || !PList_Get_Doc_Freq(plist)) { DECREF(plist); DECREF(plists); return NULL; } VA_Push(plists, (Obj*)plist); } retval = (Matcher*)PhraseScorer_new( Compiler_Get_Similarity(self), plists, (Compiler*)self ); DECREF(plists); return retval; }
MemoryPool* MemPool_new(u32_t arena_size) { MemoryPool *self = (MemoryPool*)VTable_Make_Obj(&MEMORYPOOL); self->arena_size = arena_size == 0 ? DEFAULT_BUF_SIZE : arena_size; self->arenas = VA_new(16); self->tick = -1; self->buf = NULL; self->limit = NULL; self->consumed = 0; return self; }
Matcher* ProximityCompiler_Make_Matcher_IMP(ProximityCompiler *self, SegReader *reader, bool need_score) { ProximityCompilerIVARS *const ivars = ProximityCompiler_IVARS(self); UNUSED_VAR(need_score); ProximityQueryIVARS *const parent_ivars = ProximityQuery_IVARS((ProximityQuery*)ivars->parent); VArray *const terms = parent_ivars->terms; uint32_t num_terms = VA_Get_Size(terms); // Bail if there are no terms. if (!num_terms) { return NULL; } // Bail unless field is valid and posting type supports positions. Similarity *sim = ProximityCompiler_Get_Similarity(self); Posting *posting = Sim_Make_Posting(sim); if (posting == NULL || !Obj_Is_A((Obj*)posting, SCOREPOSTING)) { DECREF(posting); return NULL; } DECREF(posting); // Bail if there's no PostingListReader for this segment. PostingListReader *const plist_reader = (PostingListReader*)SegReader_Fetch( reader, Class_Get_Name(POSTINGLISTREADER)); if (!plist_reader) { return NULL; } // Look up each term. VArray *plists = VA_new(num_terms); for (uint32_t i = 0; i < num_terms; i++) { Obj *term = VA_Fetch(terms, i); PostingList *plist = PListReader_Posting_List(plist_reader, parent_ivars->field, term); // Bail if any one of the terms isn't in the index. if (!plist || !PList_Get_Doc_Freq(plist)) { DECREF(plist); DECREF(plists); return NULL; } VA_Push(plists, (Obj*)plist); } Matcher *retval = (Matcher*)ProximityMatcher_new(sim, plists, (Compiler*)self, ivars->within); DECREF(plists); return retval; }