static void test_Read_File_and_Write_File(TestBatchRunner *runner) { Snapshot *snapshot = Snapshot_new(); Folder *folder = (Folder*)RAMFolder_new(NULL); String *snap = (String*)SSTR_WRAP_UTF8("snap", 4); String *foo = (String*)SSTR_WRAP_UTF8("foo", 3); Snapshot_Add_Entry(snapshot, foo); Snapshot_Write_File(snapshot, folder, snap); Snapshot *dupe = Snapshot_new(); Snapshot *read_retval = Snapshot_Read_File(dupe, folder, snap); TEST_TRUE(runner, dupe == read_retval, "Read_File() returns the object"); Vector *orig_list = Snapshot_List(snapshot); Vector *dupe_list = Snapshot_List(dupe); TEST_TRUE(runner, Vec_Equals(orig_list, (Obj*)dupe_list), "Round trip through Write_File() and Read_File()"); DECREF(orig_list); DECREF(dupe_list); DECREF(dupe); DECREF(snapshot); DECREF(folder); }
static void test_Add_and_Delete(TestBatchRunner *runner) { Snapshot *snapshot = Snapshot_new(); String *foo = (String*)SSTR_WRAP_UTF8("foo", 3); String *bar = (String*)SSTR_WRAP_UTF8("bar", 3); Snapshot_Add_Entry(snapshot, foo); Snapshot_Add_Entry(snapshot, foo); // redundant Vector *entries = Snapshot_List(snapshot); TEST_INT_EQ(runner, Snapshot_Num_Entries(snapshot), 1, "One entry added"); TEST_TRUE(runner, Str_Equals(foo, Vec_Fetch(entries, 0)), "correct entry"); DECREF(entries); Snapshot_Add_Entry(snapshot, bar); TEST_INT_EQ(runner, Snapshot_Num_Entries(snapshot), 2, "second entry added"); Snapshot_Delete_Entry(snapshot, foo); TEST_INT_EQ(runner, Snapshot_Num_Entries(snapshot), 1, "Delete_Entry"); DECREF(snapshot); }
static void test_path_handling(TestBatchRunner *runner) { Snapshot *snapshot = Snapshot_new(); Folder *folder = (Folder*)RAMFolder_new(NULL); String *snap = (String*)SSTR_WRAP_UTF8("snap", 4); String *crackle = (String*)SSTR_WRAP_UTF8("crackle", 7); Snapshot_Write_File(snapshot, folder, snap); TEST_TRUE(runner, Str_Equals(snap, (Obj*)Snapshot_Get_Path(snapshot)), "Write_File() sets path as a side effect"); Folder_Rename(folder, snap, crackle); Snapshot_Read_File(snapshot, folder, crackle); TEST_TRUE(runner, Str_Equals(crackle, (Obj*)Snapshot_Get_Path(snapshot)), "Read_File() sets path as a side effect"); Snapshot_Set_Path(snapshot, snap); TEST_TRUE(runner, Str_Equals(snap, (Obj*)Snapshot_Get_Path(snapshot)), "Set_Path()"); DECREF(folder); DECREF(snapshot); }
static void S_discover_unused(FilePurger *self, VArray **purgables_ptr, VArray **snapshots_ptr) { Folder *folder = self->folder; DirHandle *dh = Folder_Open_Dir(folder, NULL); if (!dh) { RETHROW(INCREF(Err_get_error())); } VArray *spared = VA_new(1); VArray *snapshots = VA_new(1); CharBuf *snapfile = NULL; // Start off with the list of files in the current snapshot. if (self->snapshot) { VArray *entries = Snapshot_List(self->snapshot); VArray *referenced = S_find_all_referenced(folder, entries); VA_Push_VArray(spared, referenced); DECREF(entries); DECREF(referenced); snapfile = Snapshot_Get_Path(self->snapshot); if (snapfile) { VA_Push(spared, INCREF(snapfile)); } } CharBuf *entry = DH_Get_Entry(dh); Hash *candidates = Hash_new(64); while (DH_Next(dh)) { if (!CB_Starts_With_Str(entry, "snapshot_", 9)) { continue; } else if (!CB_Ends_With_Str(entry, ".json", 5)) { continue; } else if (snapfile && CB_Equals(entry, (Obj*)snapfile)) { continue; } else { Snapshot *snapshot = Snapshot_Read_File(Snapshot_new(), folder, entry); Lock *lock = IxManager_Make_Snapshot_Read_Lock(self->manager, entry); VArray *snap_list = Snapshot_List(snapshot); VArray *referenced = S_find_all_referenced(folder, snap_list); // DON'T obtain the lock -- only see whether another // entity holds a lock on the snapshot file. if (lock) { Lock_Clear_Stale(lock); } if (lock && Lock_Is_Locked(lock)) { // The snapshot file is locked, which means someone's using // that version of the index -- protect all of its entries. uint32_t new_size = VA_Get_Size(spared) + VA_Get_Size(referenced) + 1; VA_Grow(spared, new_size); VA_Push(spared, (Obj*)CB_Clone(entry)); VA_Push_VArray(spared, referenced); } else { // No one's using this snapshot, so all of its entries are // candidates for deletion. for (uint32_t i = 0, max = VA_Get_Size(referenced); i < max; i++) { CharBuf *file = (CharBuf*)VA_Fetch(referenced, i); Hash_Store(candidates, (Obj*)file, INCREF(&EMPTY)); } VA_Push(snapshots, INCREF(snapshot)); } DECREF(referenced); DECREF(snap_list); DECREF(snapshot); DECREF(lock); } } DECREF(dh); // Clean up after a dead segment consolidation. S_zap_dead_merge(self, candidates); // Eliminate any current files from the list of files to be purged. for (uint32_t i = 0, max = VA_Get_Size(spared); i < max; i++) { CharBuf *filename = (CharBuf*)VA_Fetch(spared, i); DECREF(Hash_Delete(candidates, (Obj*)filename)); } // Pass back purgables and Snapshots. *purgables_ptr = Hash_Keys(candidates); *snapshots_ptr = snapshots; DECREF(candidates); DECREF(spared); }
Indexer* Indexer_init(Indexer *self, Schema *schema, Obj *index, IndexManager *manager, int32_t flags) { bool_t create = (flags & Indexer_CREATE) ? true : false; bool_t truncate = (flags & Indexer_TRUNCATE) ? true : false; Folder *folder = S_init_folder(index, create); Lock *write_lock; CharBuf *latest_snapfile; Snapshot *latest_snapshot = Snapshot_new(); // Init. self->stock_doc = Doc_new(NULL, 0); self->truncate = false; self->optimize = false; self->prepared = false; self->needs_commit = false; self->snapfile = NULL; self->merge_lock = NULL; // Assign. self->folder = folder; self->manager = manager ? (IndexManager*)INCREF(manager) : IxManager_new(NULL, NULL); IxManager_Set_Folder(self->manager, folder); // Get a write lock for this folder. write_lock = IxManager_Make_Write_Lock(self->manager); Lock_Clear_Stale(write_lock); if (Lock_Obtain(write_lock)) { // Only assign if successful, otherwise DESTROY unlocks -- bad! self->write_lock = write_lock; } else { DECREF(write_lock); DECREF(self); RETHROW(INCREF(Err_get_error())); } // Find the latest snapshot or create a new one. latest_snapfile = IxFileNames_latest_snapshot(folder); if (latest_snapfile) { Snapshot_Read_File(latest_snapshot, folder, latest_snapfile); } // Look for an existing Schema if one wasn't supplied. if (schema) { self->schema = (Schema*)INCREF(schema); } else { if (!latest_snapfile) { THROW(ERR, "No Schema supplied, and can't find one in the index"); } else { CharBuf *schema_file = S_find_schema_file(latest_snapshot); Hash *dump = (Hash*)Json_slurp_json(folder, schema_file); if (dump) { // read file successfully self->schema = (Schema*)CERTIFY( VTable_Load_Obj(SCHEMA, (Obj*)dump), SCHEMA); schema = self->schema; DECREF(dump); schema_file = NULL; } else { THROW(ERR, "Failed to parse %o", schema_file); } } } // If we're clobbering, start with an empty Snapshot and an empty // PolyReader. Otherwise, start with the most recent Snapshot and an // up-to-date PolyReader. if (truncate) { self->snapshot = Snapshot_new(); self->polyreader = PolyReader_new(schema, folder, NULL, NULL, NULL); self->truncate = true; } else { // TODO: clone most recent snapshot rather than read it twice. self->snapshot = (Snapshot*)INCREF(latest_snapshot); self->polyreader = latest_snapfile ? PolyReader_open((Obj*)folder, NULL, NULL) : PolyReader_new(schema, folder, NULL, NULL, NULL); if (latest_snapfile) { // Make sure than any existing fields which may have been // dynamically added during past indexing sessions get added. Schema *old_schema = PolyReader_Get_Schema(self->polyreader); Schema_Eat(schema, old_schema); } } // Zap detritus from previous sessions. { // Note: we have to feed FilePurger with the most recent snapshot file // now, but with the Indexer's snapshot later. FilePurger *file_purger = FilePurger_new(folder, latest_snapshot, self->manager); FilePurger_Purge(file_purger); DECREF(file_purger); } // Create a new segment. { int64_t new_seg_num = IxManager_Highest_Seg_Num(self->manager, latest_snapshot) + 1; Lock *merge_lock = IxManager_Make_Merge_Lock(self->manager); uint32_t i, max; if (Lock_Is_Locked(merge_lock)) { // If there's a background merge process going on, stay out of its // way. Hash *merge_data = IxManager_Read_Merge_Data(self->manager); Obj *cutoff_obj = merge_data ? Hash_Fetch_Str(merge_data, "cutoff", 6) : NULL; if (!cutoff_obj) { DECREF(merge_lock); DECREF(merge_data); THROW(ERR, "Background merge detected, but can't read merge data"); } else { int64_t cutoff = Obj_To_I64(cutoff_obj); if (cutoff >= new_seg_num) { new_seg_num = cutoff + 1; } } DECREF(merge_data); } self->segment = Seg_new(new_seg_num); // Add all known fields to Segment. { VArray *fields = Schema_All_Fields(schema); for (i = 0, max = VA_Get_Size(fields); i < max; i++) { Seg_Add_Field(self->segment, (CharBuf*)VA_Fetch(fields, i)); } DECREF(fields); } DECREF(merge_lock); } // Create new SegWriter and FilePurger. self->file_purger = FilePurger_new(folder, self->snapshot, self->manager); self->seg_writer = SegWriter_new(self->schema, self->snapshot, self->segment, self->polyreader); SegWriter_Prep_Seg_Dir(self->seg_writer); // Grab a local ref to the DeletionsWriter. self->del_writer = (DeletionsWriter*)INCREF( SegWriter_Get_Del_Writer(self->seg_writer)); DECREF(latest_snapfile); DECREF(latest_snapshot); return self; }
BackgroundMerger* BGMerger_init(BackgroundMerger *self, Obj *index, IndexManager *manager) { BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self); Folder *folder = S_init_folder(index); // Init. ivars->optimize = false; ivars->prepared = false; ivars->needs_commit = false; ivars->snapfile = NULL; ivars->doc_maps = Hash_new(0); // Assign. ivars->folder = folder; if (manager) { ivars->manager = (IndexManager*)INCREF(manager); } else { ivars->manager = IxManager_new(NULL, NULL); IxManager_Set_Write_Lock_Timeout(ivars->manager, 10000); } IxManager_Set_Folder(ivars->manager, folder); // Obtain write lock (which we'll only hold briefly), then merge lock. S_obtain_write_lock(self); if (!ivars->write_lock) { DECREF(self); RETHROW(INCREF(Err_get_error())); } S_obtain_merge_lock(self); if (!ivars->merge_lock) { DECREF(self); RETHROW(INCREF(Err_get_error())); } // Find the latest snapshot. If there's no index content, bail early. ivars->snapshot = Snapshot_Read_File(Snapshot_new(), folder, NULL); if (!Snapshot_Get_Path(ivars->snapshot)) { S_release_write_lock(self); S_release_merge_lock(self); return self; } // Create FilePurger. Zap detritus from previous sessions. ivars->file_purger = FilePurger_new(folder, ivars->snapshot, ivars->manager); FilePurger_Purge(ivars->file_purger); // Open a PolyReader, passing in the IndexManager so we get a read lock on // the Snapshot's files -- so that Indexers don't zap our files while // we're operating in the background. ivars->polyreader = PolyReader_open((Obj*)folder, NULL, ivars->manager); // Clone the PolyReader's schema. Obj *dump = (Obj*)Schema_Dump(PolyReader_Get_Schema(ivars->polyreader)); ivars->schema = (Schema*)CERTIFY(Freezer_load(dump), SCHEMA); DECREF(dump); // Create new Segment. int64_t new_seg_num = IxManager_Highest_Seg_Num(ivars->manager, ivars->snapshot) + 1; Vector *fields = Schema_All_Fields(ivars->schema); ivars->segment = Seg_new(new_seg_num); for (uint32_t i = 0, max = Vec_Get_Size(fields); i < max; i++) { Seg_Add_Field(ivars->segment, (String*)Vec_Fetch(fields, i)); } DECREF(fields); // Our "cutoff" is the segment this BackgroundMerger will write. Now that // we've determined the cutoff, write the merge data file. ivars->cutoff = Seg_Get_Number(ivars->segment); IxManager_Write_Merge_Data(ivars->manager, ivars->cutoff); /* Create the SegWriter but hold off on preparing the new segment * directory -- because if we don't need to merge any segments we don't * need it. (We've reserved the dir by plopping down the merge.json * file.) */ ivars->seg_writer = SegWriter_new(ivars->schema, ivars->snapshot, ivars->segment, ivars->polyreader); // Grab a local ref to the DeletionsWriter. ivars->del_writer = (DeletionsWriter*)INCREF(SegWriter_Get_Del_Writer(ivars->seg_writer)); // Release the write lock. Now new Indexers can start while we work in // the background. S_release_write_lock(self); return self; }
void BGMerger_Prepare_Commit_IMP(BackgroundMerger *self) { BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self); Vector *seg_readers = PolyReader_Get_Seg_Readers(ivars->polyreader); uint32_t num_seg_readers = Vec_Get_Size(seg_readers); uint32_t segs_merged = 0; if (ivars->prepared) { THROW(ERR, "Can't call Prepare_Commit() more than once"); } // Maybe merge existing index data. if (num_seg_readers) { segs_merged = S_maybe_merge(self); } if (!segs_merged) { // Nothing merged. Leave `needs_commit` false and bail out. ivars->prepared = true; return; } // Finish the segment and write a new snapshot file. else { Folder *folder = ivars->folder; Snapshot *snapshot = ivars->snapshot; // Write out new deletions. if (DelWriter_Updated(ivars->del_writer)) { // Only write out if they haven't all been applied. if (segs_merged != num_seg_readers) { DelWriter_Finish(ivars->del_writer); } } // Finish the segment. SegWriter_Finish(ivars->seg_writer); // Grab the write lock. S_obtain_write_lock(self); if (!ivars->write_lock) { RETHROW(INCREF(Err_get_error())); } // Write temporary snapshot file. DECREF(ivars->snapfile); String *snapfile = IxManager_Make_Snapshot_Filename(ivars->manager); ivars->snapfile = Str_Cat_Trusted_Utf8(snapfile, ".temp", 5); DECREF(snapfile); Folder_Delete(folder, ivars->snapfile); Snapshot_Write_File(snapshot, folder, ivars->snapfile); // Determine whether the index has been updated while this background // merge process was running. String *start_snapfile = Snapshot_Get_Path(PolyReader_Get_Snapshot(ivars->polyreader)); Snapshot *latest_snapshot = Snapshot_Read_File(Snapshot_new(), ivars->folder, NULL); String *latest_snapfile = Snapshot_Get_Path(latest_snapshot); bool index_updated = !Str_Equals(start_snapfile, (Obj*)latest_snapfile); if (index_updated) { /* See if new deletions have been applied since this * background merge process started against any of the * segments we just merged away. If that's true, we need to * write another segment which applies the deletions against * the new composite segment. */ S_merge_updated_deletions(self); // Add the fresh content to our snapshot. (It's important to // run this AFTER S_merge_updated_deletions, because otherwise // we couldn't tell whether the deletion counts changed.) Vector *files = Snapshot_List(latest_snapshot); for (uint32_t i = 0, max = Vec_Get_Size(files); i < max; i++) { String *file = (String*)Vec_Fetch(files, i); if (Str_Starts_With_Utf8(file, "seg_", 4)) { int64_t gen = (int64_t)IxFileNames_extract_gen(file); if (gen > ivars->cutoff) { Snapshot_Add_Entry(ivars->snapshot, file); } } } DECREF(files); // Since the snapshot content has changed, we need to rewrite it. Folder_Delete(folder, ivars->snapfile); Snapshot_Write_File(snapshot, folder, ivars->snapfile); } DECREF(latest_snapshot); ivars->needs_commit = true; } // Close reader, so that we can delete its files if appropriate. PolyReader_Close(ivars->polyreader); ivars->prepared = true; }
static bool S_merge_updated_deletions(BackgroundMerger *self) { BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self); Hash *updated_deletions = NULL; PolyReader *new_polyreader = PolyReader_open((Obj*)ivars->folder, NULL, NULL); Vector *new_seg_readers = PolyReader_Get_Seg_Readers(new_polyreader); Vector *old_seg_readers = PolyReader_Get_Seg_Readers(ivars->polyreader); Hash *new_segs = Hash_new(Vec_Get_Size(new_seg_readers)); for (uint32_t i = 0, max = Vec_Get_Size(new_seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(new_seg_readers, i); String *seg_name = SegReader_Get_Seg_Name(seg_reader); Hash_Store(new_segs, seg_name, INCREF(seg_reader)); } for (uint32_t i = 0, max = Vec_Get_Size(old_seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(old_seg_readers, i); String *seg_name = SegReader_Get_Seg_Name(seg_reader); // If this segment was merged away... if (Hash_Fetch(ivars->doc_maps, seg_name)) { SegReader *new_seg_reader = (SegReader*)CERTIFY( Hash_Fetch(new_segs, seg_name), SEGREADER); int32_t old_del_count = SegReader_Del_Count(seg_reader); int32_t new_del_count = SegReader_Del_Count(new_seg_reader); // ... were any new deletions applied against it? if (old_del_count != new_del_count) { DeletionsReader *del_reader = (DeletionsReader*)SegReader_Obtain( new_seg_reader, Class_Get_Name(DELETIONSREADER)); if (!updated_deletions) { updated_deletions = Hash_new(max); } Hash_Store(updated_deletions, seg_name, (Obj*)DelReader_Iterator(del_reader)); } } } DECREF(new_polyreader); DECREF(new_segs); if (!updated_deletions) { return false; } else { PolyReader *merge_polyreader = PolyReader_open((Obj*)ivars->folder, ivars->snapshot, NULL); Vector *merge_seg_readers = PolyReader_Get_Seg_Readers(merge_polyreader); Snapshot *latest_snapshot = Snapshot_Read_File(Snapshot_new(), ivars->folder, NULL); int64_t new_seg_num = IxManager_Highest_Seg_Num(ivars->manager, latest_snapshot) + 1; Segment *new_segment = Seg_new(new_seg_num); SegWriter *seg_writer = SegWriter_new(ivars->schema, ivars->snapshot, new_segment, merge_polyreader); DeletionsWriter *del_writer = SegWriter_Get_Del_Writer(seg_writer); int64_t merge_seg_num = Seg_Get_Number(ivars->segment); uint32_t seg_tick = INT32_MAX; int32_t offset = INT32_MAX; SegWriter_Prep_Seg_Dir(seg_writer); for (uint32_t i = 0, max = Vec_Get_Size(merge_seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(merge_seg_readers, i); if (SegReader_Get_Seg_Num(seg_reader) == merge_seg_num) { I32Array *offsets = PolyReader_Offsets(merge_polyreader); seg_tick = i; offset = I32Arr_Get(offsets, seg_tick); DECREF(offsets); } } if (offset == INT32_MAX) { THROW(ERR, "Failed sanity check"); } HashIterator *iter = HashIter_new(updated_deletions); while (HashIter_Next(iter)) { String *seg_name = HashIter_Get_Key(iter); Matcher *deletions = (Matcher*)HashIter_Get_Value(iter); I32Array *doc_map = (I32Array*)CERTIFY( Hash_Fetch(ivars->doc_maps, seg_name), I32ARRAY); int32_t del; while (0 != (del = Matcher_Next(deletions))) { // Find the slot where the deleted doc resides in the // rewritten segment. If the doc was already deleted when we // were merging, do nothing. int32_t remapped = I32Arr_Get(doc_map, del); if (remapped) { // It's a new deletion, so carry it forward and zap it in // the rewritten segment. DelWriter_Delete_By_Doc_ID(del_writer, remapped + offset); } } } DECREF(iter); // Finish the segment and clean up. DelWriter_Finish(del_writer); SegWriter_Finish(seg_writer); DECREF(seg_writer); DECREF(new_segment); DECREF(latest_snapshot); DECREF(merge_polyreader); DECREF(updated_deletions); } return true; }