// Default to sort-by-score-then-doc-id. static VArray* S_default_sort_rules() { VArray *rules = VA_new(1); VA_Push(rules, (Obj*)SortRule_new(SortRule_SCORE, NULL, false)); VA_Push(rules, (Obj*)SortRule_new(SortRule_DOC_ID, NULL, false)); return rules; }
static void test_Keys_Values_Iter(TestBatch *batch) { Hash *hash = Hash_new(0); // trigger multiple rebuilds. VArray *expected = VA_new(100); VArray *keys; VArray *values; for (uint32_t i = 0; i < 500; i++) { CharBuf *cb = CB_newf("%u32", i); Hash_Store(hash, (Obj*)cb, (Obj*)cb); VA_Push(expected, INCREF(cb)); } VA_Sort(expected, NULL, NULL); keys = Hash_Keys(hash); values = Hash_Values(hash); VA_Sort(keys, NULL, NULL); VA_Sort(values, NULL, NULL); TEST_TRUE(batch, VA_Equals(keys, (Obj*)expected), "Keys"); TEST_TRUE(batch, VA_Equals(values, (Obj*)expected), "Values"); VA_Clear(keys); VA_Clear(values); { Obj *key; Obj *value; Hash_Iterate(hash); while (Hash_Next(hash, &key, &value)) { VA_Push(keys, INCREF(key)); VA_Push(values, INCREF(value)); } } VA_Sort(keys, NULL, NULL); VA_Sort(values, NULL, NULL); TEST_TRUE(batch, VA_Equals(keys, (Obj*)expected), "Keys from Iter"); TEST_TRUE(batch, VA_Equals(values, (Obj*)expected), "Values from Iter"); { ZombieCharBuf *forty = ZCB_WRAP_STR("40", 2); ZombieCharBuf *nope = ZCB_WRAP_STR("nope", 4); Obj *key = Hash_Find_Key(hash, (Obj*)forty, ZCB_Hash_Sum(forty)); TEST_TRUE(batch, Obj_Equals(key, (Obj*)forty), "Find_Key"); key = Hash_Find_Key(hash, (Obj*)nope, ZCB_Hash_Sum(nope)), TEST_TRUE(batch, key == NULL, "Find_Key returns NULL for non-existent key"); } DECREF(hash); DECREF(expected); DECREF(keys); DECREF(values); }
bool_t Folder_delete_tree(Folder *self, const CharBuf *path) { Folder *enclosing_folder = Folder_Enclosing_Folder(self, path); // Don't allow Folder to delete itself. if (!path || !CB_Get_Size(path)) { return false; } if (enclosing_folder) { ZombieCharBuf *local = IxFileNames_local_part(path, ZCB_BLANK()); if (Folder_Local_Is_Directory(enclosing_folder, (CharBuf*)local)) { Folder *inner_folder = Folder_Local_Find_Folder(enclosing_folder, (CharBuf*)local); DirHandle *dh = Folder_Local_Open_Dir(inner_folder); if (dh) { VArray *files = VA_new(20); VArray *dirs = VA_new(20); CharBuf *entry = DH_Get_Entry(dh); while (DH_Next(dh)) { VA_Push(files, (Obj*)CB_Clone(entry)); if (DH_Entry_Is_Dir(dh) && !DH_Entry_Is_Symlink(dh)) { VA_Push(dirs, (Obj*)CB_Clone(entry)); } } for (uint32_t i = 0, max = VA_Get_Size(dirs); i < max; i++) { CharBuf *name = (CharBuf*)VA_Fetch(files, i); bool_t success = Folder_Delete_Tree(inner_folder, name); if (!success && Folder_Local_Exists(inner_folder, name)) { break; } } for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) { CharBuf *name = (CharBuf*)VA_Fetch(files, i); bool_t success = Folder_Local_Delete(inner_folder, name); if (!success && Folder_Local_Exists(inner_folder, name)) { break; } } DECREF(dirs); DECREF(files); DECREF(dh); } } return Folder_Local_Delete(enclosing_folder, (CharBuf*)local); } else { // Return failure if the entry wasn't there in the first place. return false; } }
VArray* TermCompiler_Highlight_Spans_IMP(TermCompiler *self, Searcher *searcher, DocVector *doc_vec, String *field) { TermCompilerIVARS *const ivars = TermCompiler_IVARS(self); TermQueryIVARS *const parent_ivars = TermQuery_IVARS((TermQuery*)ivars->parent); VArray *spans = VA_new(0); TermVector *term_vector; I32Array *starts, *ends; UNUSED_VAR(searcher); if (!Str_Equals(parent_ivars->field, (Obj*)field)) { return spans; } // Add all starts and ends. term_vector = DocVec_Term_Vector(doc_vec, field, (String*)parent_ivars->term); if (!term_vector) { return spans; } starts = TV_Get_Start_Offsets(term_vector); ends = TV_Get_End_Offsets(term_vector); for (uint32_t i = 0, max = I32Arr_Get_Size(starts); i < max; i++) { int32_t start = I32Arr_Get(starts, i); int32_t length = I32Arr_Get(ends, i) - start; VA_Push(spans, (Obj*)Span_new(start, length, TermCompiler_Get_Weight(self))); } DECREF(term_vector); return spans; }
PolyReader* PolyReader_init(PolyReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, IndexManager *manager, VArray *sub_readers) { PolyReaderIVARS *const ivars = PolyReader_IVARS(self); ivars->doc_max = 0; ivars->del_count = 0; if (sub_readers) { uint32_t num_segs = VA_Get_Size(sub_readers); VArray *segments = VA_new(num_segs); for (uint32_t i = 0; i < num_segs; i++) { SegReader *seg_reader = (SegReader*)CERTIFY(VA_Fetch(sub_readers, i), SEGREADER); VA_Push(segments, INCREF(SegReader_Get_Segment(seg_reader))); } IxReader_init((IndexReader*)self, schema, folder, snapshot, segments, -1, manager); DECREF(segments); S_init_sub_readers(self, sub_readers); } else { IxReader_init((IndexReader*)self, schema, folder, snapshot, NULL, -1, manager); ivars->sub_readers = VA_new(0); ivars->offsets = I32Arr_new_steal(NULL, 0); } return self; }
VArray* SegReader_seg_readers(SegReader *self) { VArray *seg_readers = VA_new(1); VA_Push(seg_readers, INCREF(self)); return seg_readers; }
static void S_init_arena(MemoryPool *self, size_t amount) { ByteBuf *bb; // Indicate which arena we're using at present. self->tick++; if (self->tick < (int32_t)VA_Get_Size(self->arenas)) { // In recycle mode, use previously acquired memory. bb = (ByteBuf*)VA_Fetch(self->arenas, self->tick); if (amount >= BB_Get_Size(bb)) { BB_Grow(bb, amount); BB_Set_Size(bb, amount); } } else { // In add mode, get more mem from system. size_t buf_size = (amount + 1) > self->arena_size ? (amount + 1) : self->arena_size; char *ptr = (char*)MALLOCATE(buf_size); bb = BB_new_steal_bytes(ptr, buf_size - 1, buf_size); VA_Push(self->arenas, (Obj*)bb); } // Recalculate consumption to take into account blocked off space. self->consumed = 0; for (int32_t i = 0; i < self->tick; i++) { ByteBuf *bb = (ByteBuf*)VA_Fetch(self->arenas, i); self->consumed += BB_Get_Size(bb); } self->buf = BB_Get_Buf(bb); self->limit = self->buf + BB_Get_Size(bb); }
VArray* HeatMap_generate_proximity_boosts(HeatMap *self, VArray *spans) { VArray *boosts = VA_new(0); const uint32_t num_spans = VA_Get_Size(spans); if (num_spans > 1) { for (uint32_t i = 0, max = num_spans - 1; i < max; i++ ) { Span *span1 = (Span*)VA_Fetch(spans, i); for (uint32_t j = i + 1; j <= max; j++) { Span *span2 = (Span*)VA_Fetch(spans, j); float prox_score = HeatMap_Calc_Proximity_Boost(self, span1, span2); if (prox_score == 0) { break; } else { int32_t length = (span2->offset - span1->offset) + span2->length; VA_Push(boosts, (Obj*)Span_new(span1->offset, length, prox_score)); } } } } return boosts; }
void Inverter_Add_Field_IMP(Inverter *self, InverterEntry *entry) { InverterIVARS *const ivars = Inverter_IVARS(self); InverterEntryIVARS *const entry_ivars = InvEntry_IVARS(entry); // Get an Inversion, going through analyzer if appropriate. if (entry_ivars->analyzer) { DECREF(entry_ivars->inversion); entry_ivars->inversion = Analyzer_Transform_Text(entry_ivars->analyzer, (String*)entry_ivars->value); Inversion_Invert(entry_ivars->inversion); } else if (entry_ivars->indexed || entry_ivars->highlightable) { String *value = (String*)entry_ivars->value; size_t token_len = Str_Get_Size(value); Token *seed = Token_new(Str_Get_Ptr8(value), token_len, 0, token_len, 1.0f, 1); DECREF(entry_ivars->inversion); entry_ivars->inversion = Inversion_new(seed); DECREF(seed); Inversion_Invert(entry_ivars->inversion); // Nearly a no-op. } // Prime the iterator. VA_Push(ivars->entries, INCREF(entry)); ivars->sorted = false; }
// Create all the spans needed by HeatMap_Flatten_Spans, based on the source // offsets and lengths... but leave the scores at 0. static VArray* S_flattened_but_empty_spans(VArray *spans) { const uint32_t num_spans = VA_Get_Size(spans); int32_t *bounds = (int32_t*)MALLOCATE((num_spans * 2) * sizeof(int32_t)); // Assemble a list of all unique start/end boundaries. for (uint32_t i = 0; i < num_spans; i++) { Span *span = (Span*)VA_Fetch(spans, i); bounds[i] = span->offset; bounds[i + num_spans] = span->offset + span->length; } Sort_quicksort(bounds, num_spans * 2, sizeof(uint32_t), S_compare_i32, NULL); uint32_t num_bounds = 0; int32_t last = I32_MAX; for (uint32_t i = 0; i < num_spans * 2; i++) { if (bounds[i] != last) { bounds[num_bounds++] = bounds[i]; last = bounds[i]; } } // Create one Span for each zone between two bounds. VArray *flattened = VA_new(num_bounds - 1); for (uint32_t i = 0; i < num_bounds - 1; i++) { int32_t start = bounds[i]; int32_t length = bounds[i + 1] - start; VA_Push(flattened, (Obj*)Span_new(start, length, 0.0f)); } FREEMEM(bounds); return flattened; }
static SeriesMatcher* S_make_series_matcher(I32Array *doc_ids, I32Array *offsets, int32_t doc_max) { int32_t num_doc_ids = I32Arr_Get_Size(doc_ids); int32_t num_matchers = I32Arr_Get_Size(offsets); VArray *matchers = VA_new(num_matchers); int32_t tick = 0; int32_t i; // Divvy up doc_ids by segment into BitVectors. for (i = 0; i < num_matchers; i++) { int32_t offset = I32Arr_Get(offsets, i); int32_t max = i == num_matchers - 1 ? doc_max + 1 : I32Arr_Get(offsets, i + 1); BitVector *bit_vec = BitVec_new(max - offset); while (tick < num_doc_ids) { int32_t doc_id = I32Arr_Get(doc_ids, tick); if (doc_id > max) { break; } else { tick++; } BitVec_Set(bit_vec, doc_id - offset); } VA_Push(matchers, (Obj*)BitVecMatcher_new(bit_vec)); DECREF(bit_vec); } SeriesMatcher *series_matcher = SeriesMatcher_new(matchers, offsets); DECREF(matchers); return series_matcher; }
void BBSortEx_flush(BBSortEx *self) { u32_t cache_count = self->cache_max - self->cache_tick; VArray *elems; BBSortExRun *run; u32_t i; if (!cache_count) return; else elems = VA_new(cache_count); /* Sort, then create a new run. */ BBSortEx_Sort_Cache(self); for (i = self->cache_tick; i < self->cache_max; i++) { VA_Push(elems, self->cache[i]); } run = BBSortExRun_new(elems); DECREF(elems); BBSortEx_Add_Run(self, (SortExRun*)run); DECREF(run); /* Blank the cache vars. */ self->cache_tick += cache_count; SortEx_Clear_Cache(self); }
PolyPostingList* PolyPList_init(PolyPostingList *self, const CharBuf *field, VArray *readers, I32Array *starts) { u32_t i; const u32_t num_readers = VA_Get_Size(readers); /* Init. */ self->tick = 0; self->current = NULL; /* Assign. */ self->field = CB_Clone(field); /* Get sub-posting_lists and assign offsets. */ self->sub_plists = VA_new(num_readers); for (i = 0; i < num_readers; i++) { PostingsReader *const post_reader = (PostingsReader*)ASSERT_IS_A( VA_Fetch(readers, i), POSTINGSREADER); i32_t offset = I32Arr_Get(starts, i); SegPostingList *sub_plist = (SegPostingList*)PostReader_Posting_List( post_reader, field, NULL); if (sub_plist) { ASSERT_IS_A(sub_plist, SEGPOSTINGLIST); SegPList_Set_Doc_Base(sub_plist, offset); VA_Push(self->sub_plists, (Obj*)sub_plist); } } self->num_subs = VA_Get_Size(self->sub_plists); return self; }
static void vcatf_null_obj(VArray *tests) { CharBuf *wanted = S_get_cb("foo bar [NULL] baz"); CharBuf *got = S_get_cb("foo "); CB_catf(got, "bar %o baz", NULL); VA_Push(tests, (Obj*)TestCB_new(wanted, got)); }
static void vcatf_s(VArray *tests) { CharBuf *wanted = S_get_cb("foo bar bizzle baz"); CharBuf *got = S_get_cb("foo "); CB_catf(got, "bar %s baz", "bizzle"); VA_Push(tests, (Obj*)TestCB_new(wanted, got)); }
static void vcatf_i32(VArray *tests) { CharBuf *wanted = S_get_cb("foo bar -100000 baz"); i32_t num = -100000; CharBuf *got = S_get_cb("foo "); CB_catf(got, "bar %i32 baz", num); VA_Push(tests, (Obj*)TestCB_new(wanted, got)); }
static void vcatf_u64(VArray *tests) { CharBuf *wanted = S_get_cb("foo bar 5000000000 baz"); u64_t num = U64_C(5000000000); CharBuf *got = S_get_cb("foo "); CB_catf(got, "bar %u64 baz", num); VA_Push(tests, (Obj*)TestCB_new(wanted, got)); }
VArray* Hash_Values_IMP(Hash *self) { Obj *key; Obj *val; VArray *values = VA_new(self->size); Hash_Iterate(self); while (Hash_Next(self, &key, &val)) { VA_Push(values, INCREF(val)); } return values; }
static void vcatf_i8(VArray *tests) { CharBuf *wanted = S_get_cb("foo bar -3 baz"); i8_t num = -3; CharBuf *got = S_get_cb("foo "); CB_catf(got, "bar %i8 baz", num); VA_Push(tests, (Obj*)TestCB_new(wanted, got)); }
static void vcatf_cb(VArray *tests) { CharBuf *wanted = S_get_cb("foo bar ZEKE baz"); CharBuf *catworthy = S_get_cb("ZEKE"); CharBuf *got = S_get_cb("foo "); CB_catf(got, "bar %o baz", catworthy); DECREF(catworthy); VA_Push(tests, (Obj*)TestCB_new(wanted, got)); }
static void vcatf_obj(VArray *tests) { CharBuf *wanted = S_get_cb("ooga content:FOO booga"); LeafQuery *leaf_query = TestUtils_make_leaf_query("content", "FOO"); CharBuf *got = S_get_cb("ooga"); CB_catf(got, " %o booga", leaf_query); DECREF(leaf_query); VA_Push(tests, (Obj*)TestCB_new(wanted, got)); }
RequiredOptionalMatcher* ReqOptMatcher_init(RequiredOptionalMatcher *self, Similarity *similarity, Matcher *required_matcher, Matcher *optional_matcher) { VArray *children = VA_new(2); VA_Push(children, INCREF(required_matcher)); VA_Push(children, INCREF(optional_matcher)); PolyMatcher_init((PolyMatcher*)self, children, similarity); RequiredOptionalMatcherIVARS *const ivars = ReqOptMatcher_IVARS(self); // Assign. ivars->req_matcher = (Matcher*)INCREF(required_matcher); ivars->opt_matcher = (Matcher*)INCREF(optional_matcher); // Init. ivars->opt_matcher_first_time = true; DECREF(children); return self; }
VArray* VA_Gather_IMP(VArray *self, VA_Gather_Test_t test, void *data) { VArray *gathered = VA_new(self->size); for (uint32_t i = 0, max = self->size; i < max; i++) { if (test(self, i, data)) { Obj *elem = self->elems[i]; VA_Push(gathered, elem ? INCREF(elem) : NULL); } } return gathered; }
void SortEx_Add_Run_IMP(SortExternal *self, SortExternal *run) { SortExternalIVARS *const ivars = SortEx_IVARS(self); VA_Push(ivars->runs, (Obj*)run); uint32_t num_runs = VA_Get_Size(ivars->runs); ivars->slice_sizes = (uint32_t*)REALLOCATE(ivars->slice_sizes, num_runs * sizeof(uint32_t)); ivars->slice_starts = (Obj***)REALLOCATE(ivars->slice_starts, num_runs * sizeof(Obj**)); }
VArray* Hash_Keys_IMP(Hash *self) { Obj *key; Obj *val; VArray *keys = VA_new(self->size); Hash_Iterate(self); while (Hash_Next(self, &key, &val)) { VA_Push(keys, INCREF(key)); } return keys; }
static void test_analysis(TestBatchRunner *runner) { CaseFolder *case_folder = CaseFolder_new(); String *source = Str_newf("caPiTal ofFensE"); VArray *wanted = VA_new(1); VA_Push(wanted, (Obj*)Str_newf("capital offense")); TestUtils_test_analyzer(runner, (Analyzer*)case_folder, source, wanted, "lowercase plain text"); DECREF(wanted); DECREF(source); DECREF(case_folder); }
static void vcatf_f64(VArray *tests) { CharBuf *wanted; char buf[64]; float num = 1.3f; CharBuf *got = S_get_cb("foo "); sprintf(buf, "foo bar %g baz", num); wanted = CB_new_from_trusted_utf8(buf, strlen(buf)); CB_catf(got, "bar %f64 baz", num); VA_Push(tests, (Obj*)TestCB_new(wanted, got)); }
VArray* BBSortEx_Peek_Cache_IMP(BBSortEx *self) { BBSortExIVARS *const ivars = BBSortEx_IVARS(self); uint32_t count = ivars->buf_max - ivars->buf_tick; Obj **buffer = ivars->buffer; VArray *retval = VA_new(count); for (uint32_t i = ivars->buf_tick; i < ivars->buf_max; ++i) { VA_Push(retval, INCREF(buffer[i])); } return retval; }
int32_t Seg_add_field(Segment *self, const CharBuf *field) { Integer32 *num = (Integer32*)Hash_Fetch(self->by_name, (Obj*)field); if (num) { return Int32_Get_Value(num); } else { int32_t field_num = VA_Get_Size(self->by_num); Hash_Store(self->by_name, (Obj*)field, (Obj*)Int32_new(field_num)); VA_Push(self->by_num, (Obj*)CB_Clone(field)); return field_num; } }
PolyAnalyzer* PolyAnalyzer_init(PolyAnalyzer *self, const CharBuf *language, VArray *analyzers) { Analyzer_init((Analyzer*)self); if (analyzers) { for (uint32_t i = 0, max = VA_Get_Size(analyzers); i < max; i++) { CERTIFY(VA_Fetch(analyzers, i), ANALYZER); } self->analyzers = (VArray*)INCREF(analyzers); } else if (language) { self->analyzers = VA_new(3); VA_Push(self->analyzers, (Obj*)CaseFolder_new()); VA_Push(self->analyzers, (Obj*)RegexTokenizer_new(NULL)); VA_Push(self->analyzers, (Obj*)SnowStemmer_new(language)); } else { THROW(ERR, "Must specify either 'language' or 'analyzers'"); } return self; }