CharBuf* IxManager_make_snapshot_filename(IndexManager *self) { VArray *files = Folder_List(self->folder); u32_t i, max; i32_t max_gen = 0; for (i = 0, max = VA_Get_Size(files); i < max; i++) { CharBuf *file = (CharBuf*)VA_Fetch(files, i); if ( CB_Starts_With_Str(file, "snapshot_", 9) && CB_Ends_With_Str(file, ".json", 5) ) { i32_t gen = IxFileNames_extract_gen(file); if (gen > max_gen) { max_gen = gen; } } } DECREF(files); { i32_t new_gen = max_gen + 1; CharBuf *base_36 = StrHelp_to_base36(new_gen); CharBuf *snapfile = CB_newf("snapshot_%o.json", base_36); DECREF(base_36); return snapfile; } }
String* IxFileNames_latest_snapshot(Folder *folder) { DirHandle *dh = Folder_Open_Dir(folder, NULL); String *retval = NULL; uint64_t latest_gen = 0; if (!dh) { RETHROW(INCREF(Err_get_error())); } while (DH_Next(dh)) { String *entry = DH_Get_Entry(dh); if (Str_Starts_With_Utf8(entry, "snapshot_", 9) && Str_Ends_With_Utf8(entry, ".json", 5) ) { uint64_t gen = IxFileNames_extract_gen(entry); if (gen > latest_gen) { latest_gen = gen; DECREF(retval); retval = Str_Clone(entry); } } DECREF(entry); } DECREF(dh); return retval; }
String* IxManager_Make_Snapshot_Filename_IMP(IndexManager *self) { IndexManagerIVARS *const ivars = IxManager_IVARS(self); Folder *folder = (Folder*)CERTIFY(ivars->folder, FOLDER); DirHandle *dh = Folder_Open_Dir(folder, NULL); uint64_t max_gen = 0; if (!dh) { RETHROW(INCREF(Err_get_error())); } while (DH_Next(dh)) { String *entry = DH_Get_Entry(dh); if (Str_Starts_With_Utf8(entry, "snapshot_", 9) && Str_Ends_With_Utf8(entry, ".json", 5) ) { uint64_t gen = IxFileNames_extract_gen(entry); if (gen > max_gen) { max_gen = gen; } } DECREF(entry); } DECREF(dh); uint64_t new_gen = max_gen + 1; char base36[StrHelp_MAX_BASE36_BYTES]; StrHelp_to_base36(new_gen, &base36); return Str_newf("snapshot_%s.json", &base36); }
Segment* IxManager_make_new_segment(IndexManager *self, Snapshot *snapshot) { VArray *files = Snapshot_List(snapshot); u32_t i, max; i32_t highest_seg_num = 0; CharBuf *seg_name = CB_new(20); Segment *segment; /* Find highest seg num. */ for (i = 0, max = VA_Get_Size(files); i < max; i++) { CharBuf *file = (CharBuf*)VA_Fetch(files, i); if (CB_Starts_With_Str(file, "seg_", 4)) { i32_t seg_num = IxFileNames_extract_gen(file); if (seg_num > highest_seg_num) { highest_seg_num = seg_num; } } } /* Create segment with num one greater than current max. */ S_cat_seg_name(seg_name, highest_seg_num + 1); segment = Seg_new(seg_name, self->folder); DECREF(seg_name); DECREF(files); return segment; }
CharBuf* IxManager_make_snapshot_filename(IndexManager *self) { Folder *folder = (Folder*)CERTIFY(self->folder, FOLDER); DirHandle *dh = Folder_Open_Dir(folder, NULL); CharBuf *entry; uint64_t max_gen = 0; if (!dh) { RETHROW(INCREF(Err_get_error())); } entry = DH_Get_Entry(dh); while (DH_Next(dh)) { if ( CB_Starts_With_Str(entry, "snapshot_", 9) && CB_Ends_With_Str(entry, ".json", 5) ) { uint64_t gen = IxFileNames_extract_gen(entry); if (gen > max_gen) { max_gen = gen; } } } DECREF(dh); { uint64_t new_gen = max_gen + 1; char base36[StrHelp_MAX_BASE36_BYTES]; StrHelp_to_base36(new_gen, &base36); return CB_newf("snapshot_%s.json", &base36); } }
static void S_test_extract_gen(TestBatchRunner *runner, const char *name, uint64_t gen, const char *test_name) { StackString *source = SSTR_WRAP_UTF8(name, strlen(name)); TEST_TRUE(runner, IxFileNames_extract_gen((String*)source) == gen, test_name); }
void Indexer_prepare_commit(Indexer *self) { VArray *seg_readers = PolyReader_Get_Seg_Readers(self->polyreader); uint32_t num_seg_readers = VA_Get_Size(seg_readers); bool_t merge_happened = false; if ( !self->write_lock || self->prepared ) { THROW(ERR, "Can't call Prepare_Commit() more than once"); } // Merge existing index data. if (num_seg_readers) { merge_happened = S_maybe_merge(self, seg_readers); } // Add a new segment and write a new snapshot file if... if ( Seg_Get_Count(self->segment) // Docs/segs added. || merge_happened // Some segs merged. || !Snapshot_Num_Entries(self->snapshot) // Initializing index. || DelWriter_Updated(self->del_writer) ) { Folder *folder = self->folder; Schema *schema = self->schema; Snapshot *snapshot = self->snapshot; CharBuf *old_schema_name = S_find_schema_file(snapshot); uint64_t schema_gen = old_schema_name ? IxFileNames_extract_gen(old_schema_name) + 1 : 1; char base36[StrHelp_MAX_BASE36_BYTES]; CharBuf *new_schema_name; StrHelp_to_base36(schema_gen, &base36); new_schema_name = CB_newf("schema_%s.json", base36); // Finish the segment, write schema file. SegWriter_Finish(self->seg_writer); Schema_Write(schema, folder, new_schema_name); if (old_schema_name) { Snapshot_Delete_Entry(snapshot, old_schema_name); } Snapshot_Add_Entry(snapshot, new_schema_name); DECREF(new_schema_name); // Write temporary snapshot file. DECREF(self->snapfile); self->snapfile = IxManager_Make_Snapshot_Filename(self->manager); CB_Cat_Trusted_Str(self->snapfile, ".temp", 5); Folder_Delete(folder, self->snapfile); Snapshot_Write_File(snapshot, folder, self->snapfile); self->needs_commit = true; } // Close reader, so that we can delete its files if appropriate. PolyReader_Close(self->polyreader); self->prepared = true; }
void Indexer_Prepare_Commit_IMP(Indexer *self) { IndexerIVARS *const ivars = Indexer_IVARS(self); Vector *seg_readers = PolyReader_Get_Seg_Readers(ivars->polyreader); size_t num_seg_readers = Vec_Get_Size(seg_readers); bool merge_happened = false; if (!ivars->write_lock || ivars->prepared) { THROW(ERR, "Can't call Prepare_Commit() more than once"); } // Merge existing index data. if (num_seg_readers) { merge_happened = S_maybe_merge(self, seg_readers); } // Add a new segment and write a new snapshot file if... if (Seg_Get_Count(ivars->segment) // Docs/segs added. || merge_happened // Some segs merged. || !Snapshot_Num_Entries(ivars->snapshot) // Initializing index. || DelWriter_Updated(ivars->del_writer) ) { Folder *folder = ivars->folder; Schema *schema = ivars->schema; Snapshot *snapshot = ivars->snapshot; // Derive snapshot and schema file names. DECREF(ivars->snapfile); String *snapfile = IxManager_Make_Snapshot_Filename(ivars->manager); ivars->snapfile = Str_Cat_Trusted_Utf8(snapfile, ".temp", 5); DECREF(snapfile); uint64_t schema_gen = IxFileNames_extract_gen(ivars->snapfile); char base36[StrHelp_MAX_BASE36_BYTES]; StrHelp_to_base36(schema_gen, &base36); String *new_schema_name = Str_newf("schema_%s.json", base36); // Finish the segment, write schema file. SegWriter_Finish(ivars->seg_writer); Schema_Write(schema, folder, new_schema_name); String *old_schema_name = S_find_schema_file(snapshot); if (old_schema_name) { Snapshot_Delete_Entry(snapshot, old_schema_name); } Snapshot_Add_Entry(snapshot, new_schema_name); DECREF(new_schema_name); // Write temporary snapshot file. Folder_Delete(folder, ivars->snapfile); Snapshot_Write_File(snapshot, folder, ivars->snapfile); ivars->needs_commit = true; } // Close reader, so that we can delete its files if appropriate. PolyReader_Close(ivars->polyreader); ivars->prepared = true; }
int64_t IxManager_Highest_Seg_Num_IMP(IndexManager *self, Snapshot *snapshot) { Vector *files = Snapshot_List(snapshot); uint64_t highest_seg_num = 0; UNUSED_VAR(self); for (size_t i = 0, max = Vec_Get_Size(files); i < max; i++) { String *file = (String*)Vec_Fetch(files, i); if (Seg_valid_seg_name(file)) { uint64_t seg_num = IxFileNames_extract_gen(file); if (seg_num > highest_seg_num) { highest_seg_num = seg_num; } } } DECREF(files); return (int64_t)highest_seg_num; }
int64_t IxManager_highest_seg_num(IndexManager *self, Snapshot *snapshot) { VArray *files = Snapshot_List(snapshot); uint32_t i, max; uint64_t highest_seg_num = 0; UNUSED_VAR(self); for (i = 0, max = VA_Get_Size(files); i < max; i++) { CharBuf *file = (CharBuf*)VA_Fetch(files, i); if (Seg_valid_seg_name(file)) { uint64_t seg_num = IxFileNames_extract_gen(file); if (seg_num > highest_seg_num) { highest_seg_num = seg_num; } } } DECREF(files); return (int64_t)highest_seg_num; }
void Snapshot_Write_File_IMP(Snapshot *self, Folder *folder, String *path) { SnapshotIVARS *const ivars = Snapshot_IVARS(self); Hash *all_data = Hash_new(0); Vector *list = Snapshot_List(self); // Update path. DECREF(ivars->path); if (path != NULL && Str_Get_Size(path) != 0) { ivars->path = Str_Clone(path); } else { String *latest = IxFileNames_latest_snapshot(folder); uint64_t gen = latest ? IxFileNames_extract_gen(latest) + 1 : 1; char base36[StrHelp_MAX_BASE36_BYTES]; StrHelp_to_base36(gen, &base36); ivars->path = Str_newf("snapshot_%s.json", &base36); DECREF(latest); } // Don't overwrite. if (Folder_Exists(folder, ivars->path)) { THROW(ERR, "Snapshot file '%o' already exists", ivars->path); } // Sort, then store file names. Vec_Sort(list); Hash_Store_Utf8(all_data, "entries", 7, (Obj*)list); // Create a JSON-izable data structure. Hash_Store_Utf8(all_data, "format", 6, (Obj*)Str_newf("%i32", (int32_t)Snapshot_current_file_format)); Hash_Store_Utf8(all_data, "subformat", 9, (Obj*)Str_newf("%i32", (int32_t)Snapshot_current_file_subformat)); // Write out JSON-ized data to the new file. Json_spew_json((Obj*)all_data, folder, ivars->path); DECREF(all_data); }
void Snapshot_write_file(Snapshot *self, Folder *folder, const CharBuf *path) { Hash *all_data = Hash_new(0); VArray *list = Snapshot_List(self); // Update path. DECREF(self->path); if (path) { self->path = CB_Clone(path); } else { CharBuf *latest = IxFileNames_latest_snapshot(folder); uint64_t gen = latest ? IxFileNames_extract_gen(latest) + 1 : 1; char base36[StrHelp_MAX_BASE36_BYTES]; StrHelp_to_base36(gen, &base36); self->path = CB_newf("snapshot_%s.json", &base36); DECREF(latest); } // Don't overwrite. if (Folder_Exists(folder, self->path)) { THROW(ERR, "Snapshot file '%o' already exists", self->path); } // Sort, then store file names. VA_Sort(list, NULL, NULL); Hash_Store_Str(all_data, "entries", 7, (Obj*)list); // Create a JSON-izable data structure. Hash_Store_Str(all_data, "format", 6, (Obj*)CB_newf("%i32", (int32_t)Snapshot_current_file_format) ); Hash_Store_Str(all_data, "subformat", 9, (Obj*)CB_newf("%i32", (int32_t)Snapshot_current_file_subformat) ); // Write out JSON-ized data to the new file. Json_spew_json((Obj*)all_data, folder, self->path); DECREF(all_data); }
void Snapshot_write_file(Snapshot *self, Folder *folder, const CharBuf *filename) { Hash *all_data = Hash_new(0); VArray *list = Snapshot_List(self); /* Update filename. */ DECREF(self->filename); if (filename) { self->filename = CB_Clone(filename); } else { CharBuf *latest = IxFileNames_latest_snapshot(folder); i32_t gen = latest ? IxFileNames_extract_gen(latest) + 1 : 1; CharBuf *base_36 = StrHelp_to_base36(gen); self->filename = CB_newf("snapshot_%o.json", base_36); DECREF(latest); DECREF(base_36); } /* Don't overwrite. */ if (Folder_Exists(folder, self->filename)) { THROW("Snapshot file '%o' already exists", self->filename); } /* Sort, then store file names. */ VA_Sort(list, NULL); Hash_Store_Str(all_data, "entries", 7, (Obj*)list); /* Create a JSON-izable data structure. */ Hash_Store_Str(all_data, "format", 6, (Obj*)CB_newf("%i32", (i32_t)Snapshot_current_file_format) ); /* Write out JSON-ized data to the new file. */ Json_spew_json((Obj*)all_data, folder, self->filename); DECREF(all_data); }
PolyReader* PolyReader_do_open(PolyReader *self, Obj *index, Snapshot *snapshot, IndexManager *manager) { PolyReaderIVARS *const ivars = PolyReader_IVARS(self); Folder *folder = S_derive_folder(index); uint64_t last_gen = 0; PolyReader_init(self, NULL, folder, snapshot, manager, NULL); DECREF(folder); if (manager) { if (!S_obtain_deletion_lock(self)) { DECREF(self); THROW(LOCKERR, "Couldn't get deletion lock"); } } while (1) { CharBuf *target_snap_file; // If a Snapshot was supplied, use its file. if (snapshot) { target_snap_file = Snapshot_Get_Path(snapshot); if (!target_snap_file) { THROW(ERR, "Supplied snapshot objects must not be empty"); } else { CB_Inc_RefCount(target_snap_file); } } else { // Otherwise, pick the most recent snap file. target_snap_file = IxFileNames_latest_snapshot(folder); // No snap file? Looks like the index is empty. We can stop now // and return NULL. if (!target_snap_file) { break; } } // Derive "generation" of this snapshot file from its name. uint64_t gen = IxFileNames_extract_gen(target_snap_file); // Get a read lock on the most recent snapshot file if indicated. if (manager) { if (!S_obtain_read_lock(self, target_snap_file)) { DECREF(self); THROW(LOCKERR, "Couldn't get read lock for %o", target_snap_file); } } // Testing only. if (PolyReader_race_condition_debug1) { ZombieCharBuf *temp = ZCB_WRAP_STR("temp", 4); if (Folder_Exists(folder, (CharBuf*)temp)) { bool success = Folder_Rename(folder, (CharBuf*)temp, PolyReader_race_condition_debug1); if (!success) { RETHROW(INCREF(Err_get_error())); } } PolyReader_debug1_num_passes++; } // If a Snapshot object was passed in, the file has already been read. // If that's not the case, we must read the file we just picked. if (!snapshot) { struct try_read_snapshot_context context; context.snapshot = ivars->snapshot; context.folder = folder; context.path = target_snap_file; Err *error = Err_trap(S_try_read_snapshot, &context); if (error) { S_release_read_lock(self); DECREF(target_snap_file); if (last_gen < gen) { // Index updated, so try again. DECREF(error); last_gen = gen; continue; } else { // Real error. if (manager) { S_release_deletion_lock(self); } RETHROW(error); } } } /* It's possible, though unlikely, for an Indexer to delete files * out from underneath us after the snapshot file is read but before * we've got SegReaders holding open all the required files. If we * failed to open something, see if we can find a newer snapshot file. * If we can, then the exception was due to the race condition. If * not, we have a real exception, so throw an error. */ struct try_open_elements_context context; context.self = self; context.seg_readers = NULL; Err *error = Err_trap(S_try_open_elements, &context); if (error) { S_release_read_lock(self); DECREF(target_snap_file); if (last_gen < gen) { // Index updated, so try again. DECREF(error); last_gen = gen; } else { // Real error. if (manager) { S_release_deletion_lock(self); } RETHROW(error); } } else { // Succeeded. S_init_sub_readers(self, (VArray*)context.seg_readers); DECREF(context.seg_readers); DECREF(target_snap_file); break; } } if (manager) { S_release_deletion_lock(self); } return self; }
void S_try_open_elements(void *context) { struct try_open_elements_context *args = (struct try_open_elements_context*)context; PolyReader *self = args->self; PolyReaderIVARS *const ivars = PolyReader_IVARS(self); VArray *files = Snapshot_List(ivars->snapshot); Folder *folder = PolyReader_Get_Folder(self); uint32_t num_segs = 0; uint64_t latest_schema_gen = 0; CharBuf *schema_file = NULL; // Find schema file, count segments. for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) { CharBuf *entry = (CharBuf*)VA_Fetch(files, i); if (Seg_valid_seg_name(entry)) { num_segs++; } else if (CB_Starts_With_Str(entry, "schema_", 7) && CB_Ends_With_Str(entry, ".json", 5) ) { uint64_t gen = IxFileNames_extract_gen(entry); if (gen > latest_schema_gen) { latest_schema_gen = gen; if (!schema_file) { schema_file = CB_Clone(entry); } else { CB_Mimic(schema_file, (Obj*)entry); } } } } // Read Schema. if (!schema_file) { DECREF(files); THROW(ERR, "Can't find a schema file."); } else { Hash *dump = (Hash*)Json_slurp_json(folder, schema_file); if (dump) { // read file successfully DECREF(ivars->schema); ivars->schema = (Schema*)CERTIFY( VTable_Load_Obj(SCHEMA, (Obj*)dump), SCHEMA); DECREF(dump); DECREF(schema_file); schema_file = NULL; } else { CharBuf *mess = MAKE_MESS("Failed to parse %o", schema_file); DECREF(schema_file); DECREF(files); Err_throw_mess(ERR, mess); } } VArray *segments = VA_new(num_segs); for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) { CharBuf *entry = (CharBuf*)VA_Fetch(files, i); // Create a Segment for each segmeta. if (Seg_valid_seg_name(entry)) { int64_t seg_num = IxFileNames_extract_gen(entry); Segment *segment = Seg_new(seg_num); // Bail if reading the file fails (probably because it's been // deleted and a new snapshot file has been written so we need to // retry). if (Seg_Read_File(segment, folder)) { VA_Push(segments, (Obj*)segment); } else { CharBuf *mess = MAKE_MESS("Failed to read %o", entry); DECREF(segment); DECREF(segments); DECREF(files); Err_throw_mess(ERR, mess); } } } // Sort the segments by age. VA_Sort(segments, NULL, NULL); // Open individual SegReaders. struct try_open_segreader_context seg_context; seg_context.schema = PolyReader_Get_Schema(self); seg_context.folder = folder; seg_context.snapshot = PolyReader_Get_Snapshot(self); seg_context.segments = segments; seg_context.result = NULL; args->seg_readers = VA_new(num_segs); Err *error = NULL; for (uint32_t seg_tick = 0; seg_tick < num_segs; seg_tick++) { seg_context.seg_tick = seg_tick; error = Err_trap(S_try_open_segreader, &seg_context); if (error) { break; } VA_Push(args->seg_readers, (Obj*)seg_context.result); seg_context.result = NULL; } DECREF(segments); DECREF(files); if (error) { DECREF(args->seg_readers); args->seg_readers = NULL; RETHROW(error); } }
void BGMerger_Prepare_Commit_IMP(BackgroundMerger *self) { BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self); Vector *seg_readers = PolyReader_Get_Seg_Readers(ivars->polyreader); uint32_t num_seg_readers = Vec_Get_Size(seg_readers); uint32_t segs_merged = 0; if (ivars->prepared) { THROW(ERR, "Can't call Prepare_Commit() more than once"); } // Maybe merge existing index data. if (num_seg_readers) { segs_merged = S_maybe_merge(self); } if (!segs_merged) { // Nothing merged. Leave `needs_commit` false and bail out. ivars->prepared = true; return; } // Finish the segment and write a new snapshot file. else { Folder *folder = ivars->folder; Snapshot *snapshot = ivars->snapshot; // Write out new deletions. if (DelWriter_Updated(ivars->del_writer)) { // Only write out if they haven't all been applied. if (segs_merged != num_seg_readers) { DelWriter_Finish(ivars->del_writer); } } // Finish the segment. SegWriter_Finish(ivars->seg_writer); // Grab the write lock. S_obtain_write_lock(self); if (!ivars->write_lock) { RETHROW(INCREF(Err_get_error())); } // Write temporary snapshot file. DECREF(ivars->snapfile); String *snapfile = IxManager_Make_Snapshot_Filename(ivars->manager); ivars->snapfile = Str_Cat_Trusted_Utf8(snapfile, ".temp", 5); DECREF(snapfile); Folder_Delete(folder, ivars->snapfile); Snapshot_Write_File(snapshot, folder, ivars->snapfile); // Determine whether the index has been updated while this background // merge process was running. String *start_snapfile = Snapshot_Get_Path(PolyReader_Get_Snapshot(ivars->polyreader)); Snapshot *latest_snapshot = Snapshot_Read_File(Snapshot_new(), ivars->folder, NULL); String *latest_snapfile = Snapshot_Get_Path(latest_snapshot); bool index_updated = !Str_Equals(start_snapfile, (Obj*)latest_snapfile); if (index_updated) { /* See if new deletions have been applied since this * background merge process started against any of the * segments we just merged away. If that's true, we need to * write another segment which applies the deletions against * the new composite segment. */ S_merge_updated_deletions(self); // Add the fresh content to our snapshot. (It's important to // run this AFTER S_merge_updated_deletions, because otherwise // we couldn't tell whether the deletion counts changed.) Vector *files = Snapshot_List(latest_snapshot); for (uint32_t i = 0, max = Vec_Get_Size(files); i < max; i++) { String *file = (String*)Vec_Fetch(files, i); if (Str_Starts_With_Utf8(file, "seg_", 4)) { int64_t gen = (int64_t)IxFileNames_extract_gen(file); if (gen > ivars->cutoff) { Snapshot_Add_Entry(ivars->snapshot, file); } } } DECREF(files); // Since the snapshot content has changed, we need to rewrite it. Folder_Delete(folder, ivars->snapfile); Snapshot_Write_File(snapshot, folder, ivars->snapfile); } DECREF(latest_snapshot); ivars->needs_commit = true; } // Close reader, so that we can delete its files if appropriate. PolyReader_Close(ivars->polyreader); ivars->prepared = true; }