static void test_Keys_Values_Iter(TestBatch *batch) { Hash *hash = Hash_new(0); // trigger multiple rebuilds. VArray *expected = VA_new(100); VArray *keys; VArray *values; for (uint32_t i = 0; i < 500; i++) { CharBuf *cb = CB_newf("%u32", i); Hash_Store(hash, (Obj*)cb, (Obj*)cb); VA_Push(expected, INCREF(cb)); } VA_Sort(expected, NULL, NULL); keys = Hash_Keys(hash); values = Hash_Values(hash); VA_Sort(keys, NULL, NULL); VA_Sort(values, NULL, NULL); TEST_TRUE(batch, VA_Equals(keys, (Obj*)expected), "Keys"); TEST_TRUE(batch, VA_Equals(values, (Obj*)expected), "Values"); VA_Clear(keys); VA_Clear(values); { Obj *key; Obj *value; Hash_Iterate(hash); while (Hash_Next(hash, &key, &value)) { VA_Push(keys, INCREF(key)); VA_Push(values, INCREF(value)); } } VA_Sort(keys, NULL, NULL); VA_Sort(values, NULL, NULL); TEST_TRUE(batch, VA_Equals(keys, (Obj*)expected), "Keys from Iter"); TEST_TRUE(batch, VA_Equals(values, (Obj*)expected), "Values from Iter"); { ZombieCharBuf *forty = ZCB_WRAP_STR("40", 2); ZombieCharBuf *nope = ZCB_WRAP_STR("nope", 4); Obj *key = Hash_Find_Key(hash, (Obj*)forty, ZCB_Hash_Sum(forty)); TEST_TRUE(batch, Obj_Equals(key, (Obj*)forty), "Find_Key"); key = Hash_Find_Key(hash, (Obj*)nope, ZCB_Hash_Sum(nope)), TEST_TRUE(batch, key == NULL, "Find_Key returns NULL for non-existent key"); } DECREF(hash); DECREF(expected); DECREF(keys); DECREF(values); }
VArray* IxManager_segreaders_to_merge(IndexManager *self, PolyReader *reader, bool_t all) { VArray *seg_readers = VA_Shallow_Copy(PolyReader_Get_Seg_Readers(reader)); UNUSED_VAR(self); if (!all) { u32_t i; u32_t total_docs = 0; u32_t threshold = 0; const u32_t num_seg_readers = VA_Get_Size(seg_readers); /* Sort by ascending size in docs. */ VA_Sort(seg_readers, S_compare_doc_count); /* Find sparsely populated segments. */ for (i = 0; i < num_seg_readers; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(seg_readers, i); total_docs += SegReader_Doc_Count(seg_reader); if (total_docs < Math_fibonacci(i + 5)) { threshold = i + 1; } } VA_Splice(seg_readers, threshold, num_seg_readers); } return seg_readers; }
uint32_t Inverter_Iterate_IMP(Inverter *self) { InverterIVARS *const ivars = Inverter_IVARS(self); ivars->tick = -1; if (!ivars->sorted) { VA_Sort(ivars->entries, NULL, NULL); ivars->sorted = true; } return VA_Get_Size(ivars->entries); }
HeatMap* HeatMap_init(HeatMap *self, VArray *spans, uint32_t window) { VArray *spans_copy = VA_Shallow_Copy(spans); VArray *spans_plus_boosts; self->spans = NULL; self->window = window; VA_Sort(spans_copy, NULL, NULL); spans_plus_boosts = HeatMap_Generate_Proximity_Boosts(self, spans_copy); VA_Push_VArray(spans_plus_boosts, spans_copy); VA_Sort(spans_plus_boosts, NULL, NULL); self->spans = HeatMap_Flatten_Spans(self, spans_plus_boosts); DECREF(spans_plus_boosts); DECREF(spans_copy); return self; }
static void test_stress(TestBatch *batch) { Hash *hash = Hash_new(0); // trigger multiple rebuilds. VArray *expected = VA_new(1000); VArray *keys; VArray *values; for (uint32_t i = 0; i < 1000; i++) { CharBuf *cb = TestUtils_random_string(rand() % 1200); while (Hash_Fetch(hash, (Obj*)cb)) { DECREF(cb); cb = TestUtils_random_string(rand() % 1200); } Hash_Store(hash, (Obj*)cb, (Obj*)cb); VA_Push(expected, INCREF(cb)); } VA_Sort(expected, NULL, NULL); // Overwrite for good measure. for (uint32_t i = 0; i < 1000; i++) { CharBuf *cb = (CharBuf*)VA_Fetch(expected, i); Hash_Store(hash, (Obj*)cb, INCREF(cb)); } keys = Hash_Keys(hash); values = Hash_Values(hash); VA_Sort(keys, NULL, NULL); VA_Sort(values, NULL, NULL); TEST_TRUE(batch, VA_Equals(keys, (Obj*)expected), "stress Keys"); TEST_TRUE(batch, VA_Equals(values, (Obj*)expected), "stress Values"); DECREF(keys); DECREF(values); DECREF(expected); DECREF(hash); }
VArray* IxManager_recycle(IndexManager *self, PolyReader *reader, DeletionsWriter *del_writer, int64_t cutoff, bool_t optimize) { VArray *seg_readers = PolyReader_Get_Seg_Readers(reader); VArray *candidates = VA_Gather(seg_readers, S_check_cutoff, &cutoff); VArray *recyclables = VA_new(VA_Get_Size(candidates)); const uint32_t num_candidates = VA_Get_Size(candidates); if (optimize) { DECREF(recyclables); return candidates; } // Sort by ascending size in docs, choose sparsely populated segments. VA_Sort(candidates, S_compare_doc_count, NULL); int32_t *counts = (int32_t*)MALLOCATE(num_candidates * sizeof(int32_t)); for (uint32_t i = 0; i < num_candidates; i++) { SegReader *seg_reader = (SegReader*)CERTIFY( VA_Fetch(candidates, i), SEGREADER); counts[i] = SegReader_Doc_Count(seg_reader); } I32Array *doc_counts = I32Arr_new_steal(counts, num_candidates); uint32_t threshold = IxManager_Choose_Sparse(self, doc_counts); DECREF(doc_counts); // Move SegReaders to be recycled. for (uint32_t i = 0; i < threshold; i++) { VA_Store(recyclables, i, VA_Delete(candidates, i)); } // Find segments where at least 10% of all docs have been deleted. for (uint32_t i = threshold; i < num_candidates; i++) { SegReader *seg_reader = (SegReader*)VA_Delete(candidates, i); CharBuf *seg_name = SegReader_Get_Seg_Name(seg_reader); double doc_max = SegReader_Doc_Max(seg_reader); double num_deletions = DelWriter_Seg_Del_Count(del_writer, seg_name); double del_proportion = num_deletions / doc_max; if (del_proportion >= 0.1) { VA_Push(recyclables, (Obj*)seg_reader); } else { DECREF(seg_reader); } } DECREF(candidates); return recyclables; }
String* Highlighter_Create_Excerpt_IMP(Highlighter *self, HitDoc *hit_doc) { HighlighterIVARS *const ivars = Highlighter_IVARS(self); String *field_val = (String*)HitDoc_Extract(hit_doc, ivars->field); String *retval; if (!field_val || !Obj_Is_A((Obj*)field_val, STRING)) { retval = NULL; } else if (!Str_Get_Size(field_val)) { // Empty string yields empty string. retval = Str_new_from_trusted_utf8("", 0); } else { DocVector *doc_vec = Searcher_Fetch_Doc_Vec(ivars->searcher, HitDoc_Get_Doc_ID(hit_doc)); VArray *maybe_spans = Compiler_Highlight_Spans(ivars->compiler, ivars->searcher, doc_vec, ivars->field); VArray *score_spans = maybe_spans ? maybe_spans : VA_new(0); VA_Sort(score_spans, NULL, NULL); HeatMap *heat_map = HeatMap_new(score_spans, (ivars->excerpt_length * 2) / 3); int32_t top; String *raw_excerpt = Highlighter_Raw_Excerpt(self, field_val, &top, heat_map); String *highlighted = Highlighter_Highlight_Excerpt(self, score_spans, raw_excerpt, top); DECREF(raw_excerpt); DECREF(heat_map); DECREF(score_spans); DECREF(doc_vec); retval = highlighted; } DECREF(field_val); return retval; }
void Snapshot_write_file(Snapshot *self, Folder *folder, const CharBuf *path) { Hash *all_data = Hash_new(0); VArray *list = Snapshot_List(self); // Update path. DECREF(self->path); if (path) { self->path = CB_Clone(path); } else { CharBuf *latest = IxFileNames_latest_snapshot(folder); uint64_t gen = latest ? IxFileNames_extract_gen(latest) + 1 : 1; char base36[StrHelp_MAX_BASE36_BYTES]; StrHelp_to_base36(gen, &base36); self->path = CB_newf("snapshot_%s.json", &base36); DECREF(latest); } // Don't overwrite. if (Folder_Exists(folder, self->path)) { THROW(ERR, "Snapshot file '%o' already exists", self->path); } // Sort, then store file names. VA_Sort(list, NULL, NULL); Hash_Store_Str(all_data, "entries", 7, (Obj*)list); // Create a JSON-izable data structure. Hash_Store_Str(all_data, "format", 6, (Obj*)CB_newf("%i32", (int32_t)Snapshot_current_file_format) ); Hash_Store_Str(all_data, "subformat", 9, (Obj*)CB_newf("%i32", (int32_t)Snapshot_current_file_subformat) ); // Write out JSON-ized data to the new file. Json_spew_json((Obj*)all_data, folder, self->path); DECREF(all_data); }
void Snapshot_write_file(Snapshot *self, Folder *folder, const CharBuf *filename) { Hash *all_data = Hash_new(0); VArray *list = Snapshot_List(self); /* Update filename. */ DECREF(self->filename); if (filename) { self->filename = CB_Clone(filename); } else { CharBuf *latest = IxFileNames_latest_snapshot(folder); i32_t gen = latest ? IxFileNames_extract_gen(latest) + 1 : 1; CharBuf *base_36 = StrHelp_to_base36(gen); self->filename = CB_newf("snapshot_%o.json", base_36); DECREF(latest); DECREF(base_36); } /* Don't overwrite. */ if (Folder_Exists(folder, self->filename)) { THROW("Snapshot file '%o' already exists", self->filename); } /* Sort, then store file names. */ VA_Sort(list, NULL); Hash_Store_Str(all_data, "entries", 7, (Obj*)list); /* Create a JSON-izable data structure. */ Hash_Store_Str(all_data, "format", 6, (Obj*)CB_newf("%i32", (i32_t)Snapshot_current_file_format) ); /* Write out JSON-ized data to the new file. */ Json_spew_json((Obj*)all_data, folder, self->filename); DECREF(all_data); }
static void test_List(TestBatch *batch) { Folder *folder = (Folder*)RAMFolder_new(NULL); FileHandle *fh; VArray *list; CharBuf *elem; Folder_MkDir(folder, &foo); Folder_MkDir(folder, &foo_bar); Folder_MkDir(folder, &foo_bar_baz); fh = Folder_Open_FileHandle(folder, &boffo, FH_CREATE | FH_WRITE_ONLY); DECREF(fh); fh = Folder_Open_FileHandle(folder, &banana, FH_CREATE | FH_WRITE_ONLY); DECREF(fh); list = Folder_List(folder, NULL); VA_Sort(list, NULL, NULL); TEST_INT_EQ(batch, VA_Get_Size(list), 3, "List"); elem = (CharBuf*)DOWNCAST(VA_Fetch(list, 0), CHARBUF); TEST_TRUE(batch, elem && CB_Equals(elem, (Obj*)&banana), "List first file"); elem = (CharBuf*)DOWNCAST(VA_Fetch(list, 1), CHARBUF); TEST_TRUE(batch, elem && CB_Equals(elem, (Obj*)&boffo), "List second file"); elem = (CharBuf*)DOWNCAST(VA_Fetch(list, 2), CHARBUF); TEST_TRUE(batch, elem && CB_Equals(elem, (Obj*)&foo), "List dir"); DECREF(list); list = Folder_List(folder, &foo_bar); TEST_INT_EQ(batch, VA_Get_Size(list), 1, "List subdirectory contents"); elem = (CharBuf*)DOWNCAST(VA_Fetch(list, 0), CHARBUF); TEST_TRUE(batch, elem && CB_Equals(elem, (Obj*)&baz), "Just the filename"); DECREF(list); DECREF(folder); }
void S_try_open_elements(void *context) { struct try_open_elements_context *args = (struct try_open_elements_context*)context; PolyReader *self = args->self; PolyReaderIVARS *const ivars = PolyReader_IVARS(self); VArray *files = Snapshot_List(ivars->snapshot); Folder *folder = PolyReader_Get_Folder(self); uint32_t num_segs = 0; uint64_t latest_schema_gen = 0; CharBuf *schema_file = NULL; // Find schema file, count segments. for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) { CharBuf *entry = (CharBuf*)VA_Fetch(files, i); if (Seg_valid_seg_name(entry)) { num_segs++; } else if (CB_Starts_With_Str(entry, "schema_", 7) && CB_Ends_With_Str(entry, ".json", 5) ) { uint64_t gen = IxFileNames_extract_gen(entry); if (gen > latest_schema_gen) { latest_schema_gen = gen; if (!schema_file) { schema_file = CB_Clone(entry); } else { CB_Mimic(schema_file, (Obj*)entry); } } } } // Read Schema. if (!schema_file) { DECREF(files); THROW(ERR, "Can't find a schema file."); } else { Hash *dump = (Hash*)Json_slurp_json(folder, schema_file); if (dump) { // read file successfully DECREF(ivars->schema); ivars->schema = (Schema*)CERTIFY( VTable_Load_Obj(SCHEMA, (Obj*)dump), SCHEMA); DECREF(dump); DECREF(schema_file); schema_file = NULL; } else { CharBuf *mess = MAKE_MESS("Failed to parse %o", schema_file); DECREF(schema_file); DECREF(files); Err_throw_mess(ERR, mess); } } VArray *segments = VA_new(num_segs); for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) { CharBuf *entry = (CharBuf*)VA_Fetch(files, i); // Create a Segment for each segmeta. if (Seg_valid_seg_name(entry)) { int64_t seg_num = IxFileNames_extract_gen(entry); Segment *segment = Seg_new(seg_num); // Bail if reading the file fails (probably because it's been // deleted and a new snapshot file has been written so we need to // retry). if (Seg_Read_File(segment, folder)) { VA_Push(segments, (Obj*)segment); } else { CharBuf *mess = MAKE_MESS("Failed to read %o", entry); DECREF(segment); DECREF(segments); DECREF(files); Err_throw_mess(ERR, mess); } } } // Sort the segments by age. VA_Sort(segments, NULL, NULL); // Open individual SegReaders. struct try_open_segreader_context seg_context; seg_context.schema = PolyReader_Get_Schema(self); seg_context.folder = folder; seg_context.snapshot = PolyReader_Get_Snapshot(self); seg_context.segments = segments; seg_context.result = NULL; args->seg_readers = VA_new(num_segs); Err *error = NULL; for (uint32_t seg_tick = 0; seg_tick < num_segs; seg_tick++) { seg_context.seg_tick = seg_tick; error = Err_trap(S_try_open_segreader, &seg_context); if (error) { break; } VA_Push(args->seg_readers, (Obj*)seg_context.result); seg_context.result = NULL; } DECREF(segments); DECREF(files); if (error) { DECREF(args->seg_readers); args->seg_readers = NULL; RETHROW(error); } }
static bool_t S_to_json(Obj *dump, CharBuf *json, int32_t depth) { // Guard against infinite recursion in self-referencing data structures. if (depth > MAX_DEPTH) { CharBuf *mess = MAKE_MESS("Exceeded max depth of %i32", MAX_DEPTH); Err_set_error(Err_new(mess)); return false; } if (!dump) { CB_Cat_Trusted_Str(json, "null", 4); } else if (dump == (Obj*)CFISH_TRUE) { CB_Cat_Trusted_Str(json, "true", 4); } else if (dump == (Obj*)CFISH_FALSE) { CB_Cat_Trusted_Str(json, "false", 5); } else if (Obj_Is_A(dump, CHARBUF)) { S_append_json_string(dump, json); } else if (Obj_Is_A(dump, INTNUM)) { CB_catf(json, "%i64", Obj_To_I64(dump)); } else if (Obj_Is_A(dump, FLOATNUM)) { CB_catf(json, "%f64", Obj_To_F64(dump)); } else if (Obj_Is_A(dump, VARRAY)) { VArray *array = (VArray*)dump; size_t size = VA_Get_Size(array); if (size == 0) { // Put empty array on single line. CB_Cat_Trusted_Str(json, "[]", 2); return true; } else if (size == 1) { Obj *elem = VA_Fetch(array, 0); if (!(Obj_Is_A(elem, HASH) || Obj_Is_A(elem, VARRAY))) { // Put array containing single scalar element on one line. CB_Cat_Trusted_Str(json, "[", 1); if (!S_to_json(elem, json, depth + 1)) { return false; } CB_Cat_Trusted_Str(json, "]", 1); return true; } } // Fall back to spreading elements across multiple lines. CB_Cat_Trusted_Str(json, "[", 1); for (size_t i = 0; i < size; i++) { CB_Cat_Trusted_Str(json, "\n", 1); S_cat_whitespace(json, depth + 1); if (!S_to_json(VA_Fetch(array, i), json, depth + 1)) { return false; } if (i + 1 < size) { CB_Cat_Trusted_Str(json, ",", 1); } } CB_Cat_Trusted_Str(json, "\n", 1); S_cat_whitespace(json, depth); CB_Cat_Trusted_Str(json, "]", 1); } else if (Obj_Is_A(dump, HASH)) { Hash *hash = (Hash*)dump; size_t size = Hash_Get_Size(hash); // Put empty hash on single line. if (size == 0) { CB_Cat_Trusted_Str(json, "{}", 2); return true; } // Validate that all keys are strings, then sort. VArray *keys = Hash_Keys(hash); for (size_t i = 0; i < size; i++) { Obj *key = VA_Fetch(keys, i); if (!key || !Obj_Is_A(key, CHARBUF)) { DECREF(keys); CharBuf *key_class = key ? Obj_Get_Class_Name(key) : NULL; CharBuf *mess = MAKE_MESS("Illegal key type: %o", key_class); Err_set_error(Err_new(mess)); return false; } } VA_Sort(keys, NULL, NULL); // Spread pairs across multiple lines. CB_Cat_Trusted_Str(json, "{", 1); for (size_t i = 0; i < size; i++) { Obj *key = VA_Fetch(keys, i); CB_Cat_Trusted_Str(json, "\n", 1); S_cat_whitespace(json, depth + 1); S_append_json_string(key, json); CB_Cat_Trusted_Str(json, ": ", 2); if (!S_to_json(Hash_Fetch(hash, key), json, depth + 1)) { DECREF(keys); return false; } if (i + 1 < size) { CB_Cat_Trusted_Str(json, ",", 1); } } CB_Cat_Trusted_Str(json, "\n", 1); S_cat_whitespace(json, depth); CB_Cat_Trusted_Str(json, "}", 1); DECREF(keys); } return true; }
void FilePurger_purge(FilePurger *self) { Lock *deletion_lock = IxManager_Make_Deletion_Lock(self->manager); // Obtain deletion lock, purge files, release deletion lock. Lock_Clear_Stale(deletion_lock); if (Lock_Obtain(deletion_lock)) { Folder *folder = self->folder; Hash *failures = Hash_new(0); VArray *purgables; VArray *snapshots; S_discover_unused(self, &purgables, &snapshots); // Attempt to delete entries -- if failure, no big deal, just try // again later. Proceed in reverse lexical order so that directories // get deleted after they've been emptied. VA_Sort(purgables, NULL, NULL); for (uint32_t i = VA_Get_Size(purgables); i--; ) { CharBuf *entry = (CharBuf*)VA_fetch(purgables, i); if (Hash_Fetch(self->disallowed, (Obj*)entry)) { continue; } if (!Folder_Delete(folder, entry)) { if (Folder_Exists(folder, entry)) { Hash_Store(failures, (Obj*)entry, INCREF(&EMPTY)); } } } for (uint32_t i = 0, max = VA_Get_Size(snapshots); i < max; i++) { Snapshot *snapshot = (Snapshot*)VA_Fetch(snapshots, i); bool_t snapshot_has_failures = false; if (Hash_Get_Size(failures)) { // Only delete snapshot files if all of their entries were // successfully deleted. VArray *entries = Snapshot_List(snapshot); for (uint32_t j = VA_Get_Size(entries); j--; ) { CharBuf *entry = (CharBuf*)VA_Fetch(entries, j); if (Hash_Fetch(failures, (Obj*)entry)) { snapshot_has_failures = true; break; } } DECREF(entries); } if (!snapshot_has_failures) { CharBuf *snapfile = Snapshot_Get_Path(snapshot); Folder_Delete(folder, snapfile); } } DECREF(failures); DECREF(purgables); DECREF(snapshots); Lock_Release(deletion_lock); } else { WARN("Can't obtain deletion lock, skipping deletion of " "obsolete files"); } DECREF(deletion_lock); }