Snapshot* Snapshot_Read_File_IMP(Snapshot *self, Folder *folder, String *path) { SnapshotIVARS *const ivars = Snapshot_IVARS(self); // Eliminate all prior data. Pick a snapshot file. S_zero_out(self); ivars->path = (path != NULL && Str_Get_Size(path) > 0) ? Str_Clone(path) : IxFileNames_latest_snapshot(folder); if (ivars->path) { Hash *snap_data = (Hash*)CERTIFY(Json_slurp_json(folder, ivars->path), HASH); Obj *format_obj = CERTIFY(Hash_Fetch_Utf8(snap_data, "format", 6), OBJ); int32_t format = (int32_t)Json_obj_to_i64(format_obj); Obj *subformat_obj = Hash_Fetch_Utf8(snap_data, "subformat", 9); int32_t subformat = subformat_obj ? (int32_t)Json_obj_to_i64(subformat_obj) : 0; // Verify that we can read the index properly. if (format > Snapshot_current_file_format) { THROW(ERR, "Snapshot format too recent: %i32, %i32", format, Snapshot_current_file_format); } // Build up list of entries. Vector *list = (Vector*)INCREF(CERTIFY( Hash_Fetch_Utf8(snap_data, "entries", 7), VECTOR)); if (format == 1 || (format == 2 && subformat < 1)) { Vector *cleaned = S_clean_segment_contents(list); DECREF(list); list = cleaned; } Hash_Clear(ivars->entries); for (uint32_t i = 0, max = Vec_Get_Size(list); i < max; i++) { String *entry = (String*)CERTIFY(Vec_Fetch(list, i), STRING); Hash_Store(ivars->entries, entry, (Obj*)CFISH_TRUE); } DECREF(list); DECREF(snap_data); } return self; }
Snapshot* Snapshot_read_file(Snapshot *self, Folder *folder, const CharBuf *path) { // Eliminate all prior data. Pick a snapshot file. S_zero_out(self); self->path = path ? CB_Clone(path) : IxFileNames_latest_snapshot(folder); if (self->path) { Hash *snap_data = (Hash*)CERTIFY( Json_slurp_json(folder, self->path), HASH); Obj *format_obj = CERTIFY( Hash_Fetch_Str(snap_data, "format", 6), OBJ); int32_t format = (int32_t)Obj_To_I64(format_obj); Obj *subformat_obj = Hash_Fetch_Str(snap_data, "subformat", 9); int32_t subformat = subformat_obj ? (int32_t)Obj_To_I64(subformat_obj) : 0; // Verify that we can read the index properly. if (format > Snapshot_current_file_format) { THROW(ERR, "Snapshot format too recent: %i32, %i32", format, Snapshot_current_file_format); } // Build up list of entries. VArray *list = (VArray*)CERTIFY( Hash_Fetch_Str(snap_data, "entries", 7), VARRAY); INCREF(list); if (format == 1 || (format == 2 && subformat < 1)) { VArray *cleaned = S_clean_segment_contents(list); DECREF(list); list = cleaned; } Hash_Clear(self->entries); for (uint32_t i = 0, max = VA_Get_Size(list); i < max; i++) { CharBuf *entry = (CharBuf*)CERTIFY( VA_Fetch(list, i), CHARBUF); Hash_Store(self->entries, (Obj*)entry, INCREF(&EMPTY)); } DECREF(list); DECREF(snap_data); } return self; }
void Snapshot_Write_File_IMP(Snapshot *self, Folder *folder, String *path) { SnapshotIVARS *const ivars = Snapshot_IVARS(self); Hash *all_data = Hash_new(0); Vector *list = Snapshot_List(self); // Update path. DECREF(ivars->path); if (path != NULL && Str_Get_Size(path) != 0) { ivars->path = Str_Clone(path); } else { String *latest = IxFileNames_latest_snapshot(folder); uint64_t gen = latest ? IxFileNames_extract_gen(latest) + 1 : 1; char base36[StrHelp_MAX_BASE36_BYTES]; StrHelp_to_base36(gen, &base36); ivars->path = Str_newf("snapshot_%s.json", &base36); DECREF(latest); } // Don't overwrite. if (Folder_Exists(folder, ivars->path)) { THROW(ERR, "Snapshot file '%o' already exists", ivars->path); } // Sort, then store file names. Vec_Sort(list); Hash_Store_Utf8(all_data, "entries", 7, (Obj*)list); // Create a JSON-izable data structure. Hash_Store_Utf8(all_data, "format", 6, (Obj*)Str_newf("%i32", (int32_t)Snapshot_current_file_format)); Hash_Store_Utf8(all_data, "subformat", 9, (Obj*)Str_newf("%i32", (int32_t)Snapshot_current_file_subformat)); // Write out JSON-ized data to the new file. Json_spew_json((Obj*)all_data, folder, ivars->path); DECREF(all_data); }
void Snapshot_write_file(Snapshot *self, Folder *folder, const CharBuf *path) { Hash *all_data = Hash_new(0); VArray *list = Snapshot_List(self); // Update path. DECREF(self->path); if (path) { self->path = CB_Clone(path); } else { CharBuf *latest = IxFileNames_latest_snapshot(folder); uint64_t gen = latest ? IxFileNames_extract_gen(latest) + 1 : 1; char base36[StrHelp_MAX_BASE36_BYTES]; StrHelp_to_base36(gen, &base36); self->path = CB_newf("snapshot_%s.json", &base36); DECREF(latest); } // Don't overwrite. if (Folder_Exists(folder, self->path)) { THROW(ERR, "Snapshot file '%o' already exists", self->path); } // Sort, then store file names. VA_Sort(list, NULL, NULL); Hash_Store_Str(all_data, "entries", 7, (Obj*)list); // Create a JSON-izable data structure. Hash_Store_Str(all_data, "format", 6, (Obj*)CB_newf("%i32", (int32_t)Snapshot_current_file_format) ); Hash_Store_Str(all_data, "subformat", 9, (Obj*)CB_newf("%i32", (int32_t)Snapshot_current_file_subformat) ); // Write out JSON-ized data to the new file. Json_spew_json((Obj*)all_data, folder, self->path); DECREF(all_data); }
Snapshot* Snapshot_read_file(Snapshot *self, Folder *folder, const CharBuf *filename) { /* Eliminate all prior data. Pick a snapshot file. */ S_zero_out(self); self->filename = filename ? CB_Clone(filename) : IxFileNames_latest_snapshot(folder); if (self->filename) { Hash *snap_data = (Hash*)ASSERT_IS_A( Json_slurp_json(folder, self->filename), HASH); Obj *format = ASSERT_IS_A( Hash_Fetch_Str(snap_data, "format", 6), OBJ); /* Verify that we can read the index properly. */ if (Obj_To_I64(format) > Snapshot_current_file_format) { THROW("Snapshot format too recent: %i64, %i32", Obj_To_I64(format), Snapshot_current_file_format); } /* Build up list of entries. */ { u32_t i, max; VArray *list = (VArray*)ASSERT_IS_A( Hash_Fetch_Str(snap_data, "entries", 7), VARRAY); Hash_Clear(self->entries); for (i = 0, max = VA_Get_Size(list); i < max; i++) { CharBuf *entry = (CharBuf*)ASSERT_IS_A( VA_Fetch(list, i), CHARBUF); Hash_Store(self->entries, entry, INCREF(&EMPTY)); } } DECREF(snap_data); } return self; }
void Snapshot_write_file(Snapshot *self, Folder *folder, const CharBuf *filename) { Hash *all_data = Hash_new(0); VArray *list = Snapshot_List(self); /* Update filename. */ DECREF(self->filename); if (filename) { self->filename = CB_Clone(filename); } else { CharBuf *latest = IxFileNames_latest_snapshot(folder); i32_t gen = latest ? IxFileNames_extract_gen(latest) + 1 : 1; CharBuf *base_36 = StrHelp_to_base36(gen); self->filename = CB_newf("snapshot_%o.json", base_36); DECREF(latest); DECREF(base_36); } /* Don't overwrite. */ if (Folder_Exists(folder, self->filename)) { THROW("Snapshot file '%o' already exists", self->filename); } /* Sort, then store file names. */ VA_Sort(list, NULL); Hash_Store_Str(all_data, "entries", 7, (Obj*)list); /* Create a JSON-izable data structure. */ Hash_Store_Str(all_data, "format", 6, (Obj*)CB_newf("%i32", (i32_t)Snapshot_current_file_format) ); /* Write out JSON-ized data to the new file. */ Json_spew_json((Obj*)all_data, folder, self->filename); DECREF(all_data); }
PolyReader* PolyReader_do_open(PolyReader *self, Obj *index, Snapshot *snapshot, IndexManager *manager) { PolyReaderIVARS *const ivars = PolyReader_IVARS(self); Folder *folder = S_derive_folder(index); uint64_t last_gen = 0; PolyReader_init(self, NULL, folder, snapshot, manager, NULL); DECREF(folder); if (manager) { if (!S_obtain_deletion_lock(self)) { DECREF(self); THROW(LOCKERR, "Couldn't get deletion lock"); } } while (1) { CharBuf *target_snap_file; // If a Snapshot was supplied, use its file. if (snapshot) { target_snap_file = Snapshot_Get_Path(snapshot); if (!target_snap_file) { THROW(ERR, "Supplied snapshot objects must not be empty"); } else { CB_Inc_RefCount(target_snap_file); } } else { // Otherwise, pick the most recent snap file. target_snap_file = IxFileNames_latest_snapshot(folder); // No snap file? Looks like the index is empty. We can stop now // and return NULL. if (!target_snap_file) { break; } } // Derive "generation" of this snapshot file from its name. uint64_t gen = IxFileNames_extract_gen(target_snap_file); // Get a read lock on the most recent snapshot file if indicated. if (manager) { if (!S_obtain_read_lock(self, target_snap_file)) { DECREF(self); THROW(LOCKERR, "Couldn't get read lock for %o", target_snap_file); } } // Testing only. if (PolyReader_race_condition_debug1) { ZombieCharBuf *temp = ZCB_WRAP_STR("temp", 4); if (Folder_Exists(folder, (CharBuf*)temp)) { bool success = Folder_Rename(folder, (CharBuf*)temp, PolyReader_race_condition_debug1); if (!success) { RETHROW(INCREF(Err_get_error())); } } PolyReader_debug1_num_passes++; } // If a Snapshot object was passed in, the file has already been read. // If that's not the case, we must read the file we just picked. if (!snapshot) { struct try_read_snapshot_context context; context.snapshot = ivars->snapshot; context.folder = folder; context.path = target_snap_file; Err *error = Err_trap(S_try_read_snapshot, &context); if (error) { S_release_read_lock(self); DECREF(target_snap_file); if (last_gen < gen) { // Index updated, so try again. DECREF(error); last_gen = gen; continue; } else { // Real error. if (manager) { S_release_deletion_lock(self); } RETHROW(error); } } } /* It's possible, though unlikely, for an Indexer to delete files * out from underneath us after the snapshot file is read but before * we've got SegReaders holding open all the required files. If we * failed to open something, see if we can find a newer snapshot file. * If we can, then the exception was due to the race condition. If * not, we have a real exception, so throw an error. */ struct try_open_elements_context context; context.self = self; context.seg_readers = NULL; Err *error = Err_trap(S_try_open_elements, &context); if (error) { S_release_read_lock(self); DECREF(target_snap_file); if (last_gen < gen) { // Index updated, so try again. DECREF(error); last_gen = gen; } else { // Real error. if (manager) { S_release_deletion_lock(self); } RETHROW(error); } } else { // Succeeded. S_init_sub_readers(self, (VArray*)context.seg_readers); DECREF(context.seg_readers); DECREF(target_snap_file); break; } } if (manager) { S_release_deletion_lock(self); } return self; }
Indexer* Indexer_init(Indexer *self, Schema *schema, Obj *index, IndexManager *manager, int32_t flags) { bool_t create = (flags & Indexer_CREATE) ? true : false; bool_t truncate = (flags & Indexer_TRUNCATE) ? true : false; Folder *folder = S_init_folder(index, create); Lock *write_lock; CharBuf *latest_snapfile; Snapshot *latest_snapshot = Snapshot_new(); // Init. self->stock_doc = Doc_new(NULL, 0); self->truncate = false; self->optimize = false; self->prepared = false; self->needs_commit = false; self->snapfile = NULL; self->merge_lock = NULL; // Assign. self->folder = folder; self->manager = manager ? (IndexManager*)INCREF(manager) : IxManager_new(NULL, NULL); IxManager_Set_Folder(self->manager, folder); // Get a write lock for this folder. write_lock = IxManager_Make_Write_Lock(self->manager); Lock_Clear_Stale(write_lock); if (Lock_Obtain(write_lock)) { // Only assign if successful, otherwise DESTROY unlocks -- bad! self->write_lock = write_lock; } else { DECREF(write_lock); DECREF(self); RETHROW(INCREF(Err_get_error())); } // Find the latest snapshot or create a new one. latest_snapfile = IxFileNames_latest_snapshot(folder); if (latest_snapfile) { Snapshot_Read_File(latest_snapshot, folder, latest_snapfile); } // Look for an existing Schema if one wasn't supplied. if (schema) { self->schema = (Schema*)INCREF(schema); } else { if (!latest_snapfile) { THROW(ERR, "No Schema supplied, and can't find one in the index"); } else { CharBuf *schema_file = S_find_schema_file(latest_snapshot); Hash *dump = (Hash*)Json_slurp_json(folder, schema_file); if (dump) { // read file successfully self->schema = (Schema*)CERTIFY( VTable_Load_Obj(SCHEMA, (Obj*)dump), SCHEMA); schema = self->schema; DECREF(dump); schema_file = NULL; } else { THROW(ERR, "Failed to parse %o", schema_file); } } } // If we're clobbering, start with an empty Snapshot and an empty // PolyReader. Otherwise, start with the most recent Snapshot and an // up-to-date PolyReader. if (truncate) { self->snapshot = Snapshot_new(); self->polyreader = PolyReader_new(schema, folder, NULL, NULL, NULL); self->truncate = true; } else { // TODO: clone most recent snapshot rather than read it twice. self->snapshot = (Snapshot*)INCREF(latest_snapshot); self->polyreader = latest_snapfile ? PolyReader_open((Obj*)folder, NULL, NULL) : PolyReader_new(schema, folder, NULL, NULL, NULL); if (latest_snapfile) { // Make sure than any existing fields which may have been // dynamically added during past indexing sessions get added. Schema *old_schema = PolyReader_Get_Schema(self->polyreader); Schema_Eat(schema, old_schema); } } // Zap detritus from previous sessions. { // Note: we have to feed FilePurger with the most recent snapshot file // now, but with the Indexer's snapshot later. FilePurger *file_purger = FilePurger_new(folder, latest_snapshot, self->manager); FilePurger_Purge(file_purger); DECREF(file_purger); } // Create a new segment. { int64_t new_seg_num = IxManager_Highest_Seg_Num(self->manager, latest_snapshot) + 1; Lock *merge_lock = IxManager_Make_Merge_Lock(self->manager); uint32_t i, max; if (Lock_Is_Locked(merge_lock)) { // If there's a background merge process going on, stay out of its // way. Hash *merge_data = IxManager_Read_Merge_Data(self->manager); Obj *cutoff_obj = merge_data ? Hash_Fetch_Str(merge_data, "cutoff", 6) : NULL; if (!cutoff_obj) { DECREF(merge_lock); DECREF(merge_data); THROW(ERR, "Background merge detected, but can't read merge data"); } else { int64_t cutoff = Obj_To_I64(cutoff_obj); if (cutoff >= new_seg_num) { new_seg_num = cutoff + 1; } } DECREF(merge_data); } self->segment = Seg_new(new_seg_num); // Add all known fields to Segment. { VArray *fields = Schema_All_Fields(schema); for (i = 0, max = VA_Get_Size(fields); i < max; i++) { Seg_Add_Field(self->segment, (CharBuf*)VA_Fetch(fields, i)); } DECREF(fields); } DECREF(merge_lock); } // Create new SegWriter and FilePurger. self->file_purger = FilePurger_new(folder, self->snapshot, self->manager); self->seg_writer = SegWriter_new(self->schema, self->snapshot, self->segment, self->polyreader); SegWriter_Prep_Seg_Dir(self->seg_writer); // Grab a local ref to the DeletionsWriter. self->del_writer = (DeletionsWriter*)INCREF( SegWriter_Get_Del_Writer(self->seg_writer)); DECREF(latest_snapfile); DECREF(latest_snapshot); return self; }