String* IxFileNames_latest_snapshot(Folder *folder) { DirHandle *dh = Folder_Open_Dir(folder, NULL); String *retval = NULL; uint64_t latest_gen = 0; if (!dh) { RETHROW(INCREF(Err_get_error())); } while (DH_Next(dh)) { String *entry = DH_Get_Entry(dh); if (Str_Starts_With_Utf8(entry, "snapshot_", 9) && Str_Ends_With_Utf8(entry, ".json", 5) ) { uint64_t gen = IxFileNames_extract_gen(entry); if (gen > latest_gen) { latest_gen = gen; DECREF(retval); retval = Str_Clone(entry); } } DECREF(entry); } DECREF(dh); return retval; }
String* IxManager_Make_Snapshot_Filename_IMP(IndexManager *self) { IndexManagerIVARS *const ivars = IxManager_IVARS(self); Folder *folder = (Folder*)CERTIFY(ivars->folder, FOLDER); DirHandle *dh = Folder_Open_Dir(folder, NULL); uint64_t max_gen = 0; if (!dh) { RETHROW(INCREF(Err_get_error())); } while (DH_Next(dh)) { String *entry = DH_Get_Entry(dh); if (Str_Starts_With_Utf8(entry, "snapshot_", 9) && Str_Ends_With_Utf8(entry, ".json", 5) ) { uint64_t gen = IxFileNames_extract_gen(entry); if (gen > max_gen) { max_gen = gen; } } DECREF(entry); } DECREF(dh); uint64_t new_gen = max_gen + 1; char base36[StrHelp_MAX_BASE36_BYTES]; StrHelp_to_base36(new_gen, &base36); return Str_newf("snapshot_%s.json", &base36); }
bool LFLock_Maybe_Delete_File_IMP(LockFileLock *self, String *path, bool delete_mine, bool delete_other) { LockFileLockIVARS *const ivars = LFLock_IVARS(self); Folder *folder = ivars->folder; bool success = false; // Only delete locks that start with our lock name. if (!Str_Starts_With_Utf8(path, "locks", 5)) { return false; } StringIterator *iter = Str_Top(path); StrIter_Advance(iter, 5 + 1); if (!StrIter_Starts_With(iter, ivars->name)) { DECREF(iter); return false; } DECREF(iter); // Attempt to delete dead lock file. if (Folder_Exists(folder, path)) { Hash *hash = (Hash*)Json_slurp_json(folder, path); if (hash != NULL && Obj_Is_A((Obj*)hash, HASH)) { String *pid_buf = (String*)Hash_Fetch_Utf8(hash, "pid", 3); String *host = (String*)Hash_Fetch_Utf8(hash, "host", 4); String *name = (String*)Hash_Fetch_Utf8(hash, "name", 4); // Match hostname and lock name. if (host != NULL && Str_Is_A(host, STRING) && Str_Equals(host, (Obj*)ivars->host) && name != NULL && Str_Is_A(name, STRING) && Str_Equals(name, (Obj*)ivars->name) && pid_buf != NULL && Str_Is_A(pid_buf, STRING) ) { // Verify that pid is either mine or dead. int pid = (int)Str_To_I64(pid_buf); if ((delete_mine && pid == PID_getpid()) // This process. || (delete_other && !PID_active(pid)) // Dead pid. ) { if (Folder_Delete(folder, path)) { success = true; } else { String *mess = MAKE_MESS("Can't delete '%o'", path); DECREF(hash); Err_throw_mess(ERR, mess); } } } } DECREF(hash); } return success; }
Folder* FSFolder_Local_Find_Folder_IMP(FSFolder *self, String *name) { FSFolderIVARS *const ivars = FSFolder_IVARS(self); Folder *subfolder = NULL; if (!name || !Str_Get_Size(name)) { // No entity can be identified by NULL or empty string. return NULL; } else if (!S_is_local_entry(name)) { return NULL; } else if (Str_Starts_With_Utf8(name, ".", 1)) { // Don't allow access outside of the main dir. return NULL; } else if (NULL != (subfolder = (Folder*)Hash_Fetch(ivars->entries, (Obj*)name))) { if (Folder_Is_A(subfolder, FOLDER)) { return subfolder; } else { return NULL; } } String *fullpath = S_fullpath(self, name); if (S_dir_ok(fullpath)) { subfolder = (Folder*)FSFolder_new(fullpath); if (!subfolder) { DECREF(fullpath); THROW(ERR, "Failed to open FSFolder at '%o'", fullpath); } // Try to open a CompoundFileReader. On failure, just use the // existing folder. String *cfmeta_file = (String*)SSTR_WRAP_UTF8("cfmeta.json", 11); if (Folder_Local_Exists(subfolder, cfmeta_file)) { CompoundFileReader *cf_reader = CFReader_open(subfolder); if (cf_reader) { DECREF(subfolder); subfolder = (Folder*)cf_reader; } } Hash_Store(ivars->entries, (Obj*)name, (Obj*)subfolder); } DECREF(fullpath); return subfolder; }
static String* S_find_schema_file(Snapshot *snapshot) { Vector *files = Snapshot_List(snapshot); String *retval = NULL; for (size_t i = 0, max = Vec_Get_Size(files); i < max; i++) { String *file = (String*)Vec_Fetch(files, i); if (Str_Starts_With_Utf8(file, "schema_", 7) && Str_Ends_With_Utf8(file, ".json", 5) ) { retval = file; break; } } DECREF(files); return retval; }
static Vector* S_clean_segment_contents(Vector *orig) { // Since Snapshot format 2, no DataReader has depended on individual files // within segment directories being listed. Filter these files because // they cause a problem with FilePurger. Vector *cleaned = Vec_new(Vec_Get_Size(orig)); for (uint32_t i = 0, max = Vec_Get_Size(orig); i < max; i++) { String *name = (String*)Vec_Fetch(orig, i); if (!Seg_valid_seg_name(name)) { if (Str_Starts_With_Utf8(name, "seg_", 4)) { continue; // Skip this file. } } Vec_Push(cleaned, INCREF(name)); } return cleaned; }
bool Seg_valid_seg_name(String *name) { if (Str_Starts_With_Utf8(name, "seg_", 4)) { StringIterator *iter = Str_Top(name); StrIter_Advance(iter, 4); int32_t code_point; while (STR_OOB != (code_point = StrIter_Next(iter))) { if (!isalnum(code_point)) { DECREF(iter); return false; } } DECREF(iter); return true; // Success! } return false; }
Lock* IxManager_Make_Snapshot_Read_Lock_IMP(IndexManager *self, String *filename) { LockFactory *lock_factory = S_obtain_lock_factory(self); if (!Str_Starts_With_Utf8(filename, "snapshot_", 9) || !Str_Ends_With_Utf8(filename, ".json", 5) ) { THROW(ERR, "Not a snapshot filename: %o", filename); } // Truncate ".json" from end of snapshot file name. size_t lock_name_len = Str_Length(filename) - (sizeof(".json") - 1); String *lock_name = Str_SubString(filename, 0, lock_name_len); Lock *lock = LockFact_Make_Shared_Lock(lock_factory, lock_name, 1000, 100); DECREF(lock_name); return lock; }
Lock* IxManager_Make_Snapshot_Lock_IMP(IndexManager *self, String *filename) { IndexManagerIVARS *const ivars = IxManager_IVARS(self); if (!Str_Starts_With_Utf8(filename, "snapshot_", 9) || !Str_Ends_With_Utf8(filename, ".json", 5) ) { THROW(ERR, "Not a snapshot filename: %o", filename); } // Truncate ".json" from end of snapshot file name. size_t lock_name_len = Str_Length(filename) - (sizeof(".json") - 1); String *lock_name = Str_SubString(filename, 0, lock_name_len); Lock *lock = (Lock*)LFLock_new(ivars->folder, lock_name, ivars->host, 1000, 100, false); DECREF(lock_name); return lock; }
static void test_simple(TestBatchRunner *runner) { RAMFolder *folder = RAMFolder_new(NULL); String *language = SSTR_WRAP_UTF8("en", 2); Simple *lucy = Simple_new((Obj*)folder, language); String *food_field = SSTR_WRAP_UTF8("food", 4); { Doc *doc = Doc_new(NULL, 0); String *value = SSTR_WRAP_UTF8("creamed corn", 12); Doc_Store(doc, food_field, (Obj*)value); Simple_Add_Doc(lucy, doc); DECREF(doc); String *query = SSTR_WRAP_UTF8("creamed", 7); uint32_t num_results = Simple_Search(lucy, query, 0, 10); TEST_INT_EQ(runner, num_results, 1, "Search works right after add"); } { Doc *doc = Doc_new(NULL, 0); String *value = SSTR_WRAP_UTF8("creamed spinach", 15); Doc_Store(doc, food_field, (Obj*)value); Simple_Add_Doc(lucy, doc); DECREF(doc); String *query = SSTR_WRAP_UTF8("creamed", 7); uint32_t num_results = Simple_Search(lucy, query, 0, 10); TEST_INT_EQ(runner, num_results, 2, "Search returns total hits"); } { Doc *doc = Doc_new(NULL, 0); String *value = SSTR_WRAP_UTF8("creamed broccoli", 16); Doc_Store(doc, food_field, (Obj*)value); Simple_Add_Doc(lucy, doc); DECREF(doc); DECREF(lucy); lucy = Simple_new((Obj*)folder, language); String *query = SSTR_WRAP_UTF8("cream", 5); uint32_t num_results = Simple_Search(lucy, query, 0, 10); TEST_INT_EQ(runner, num_results, 3, "commit upon destroy"); HitDoc *hit; while ((hit = Simple_Next(lucy)) != NULL) { String *food = (String*)HitDoc_Extract(hit, food_field); TEST_TRUE(runner, Str_Starts_With_Utf8(food, "cream", 5), "Next"); DECREF(food); DECREF(hit); } } { Doc *doc = Doc_new(NULL, 0); String *band_field = SSTR_WRAP_UTF8("band", 4); String *value = SSTR_WRAP_UTF8("Cream", 5); Doc_Store(doc, band_field, (Obj*)value); Simple_Add_Doc(lucy, doc); DECREF(doc); String *query = SSTR_WRAP_UTF8("cream", 5); uint32_t num_results = Simple_Search(lucy, query, 0, 10); TEST_INT_EQ(runner, num_results, 4, "Search uses correct EasyAnalyzer"); } DECREF(lucy); DECREF(folder); }
static void S_discover_unused(FilePurger *self, Vector **purgables_ptr, Vector **snapshots_ptr) { FilePurgerIVARS *const ivars = FilePurger_IVARS(self); Folder *folder = ivars->folder; DirHandle *dh = Folder_Open_Dir(folder, NULL); if (!dh) { RETHROW(INCREF(Err_get_error())); } Vector *spared = Vec_new(1); Vector *snapshots = Vec_new(1); String *snapfile = NULL; // Start off with the list of files in the current snapshot. if (ivars->snapshot) { Vector *entries = Snapshot_List(ivars->snapshot); Vector *referenced = S_find_all_referenced(folder, entries); Vec_Push_All(spared, referenced); DECREF(entries); DECREF(referenced); snapfile = Snapshot_Get_Path(ivars->snapshot); if (snapfile) { Vec_Push(spared, INCREF(snapfile)); } } Hash *candidates = Hash_new(64); while (DH_Next(dh)) { String *entry = DH_Get_Entry(dh); if (Str_Starts_With_Utf8(entry, "snapshot_", 9) && Str_Ends_With_Utf8(entry, ".json", 5) && (!snapfile || !Str_Equals(entry, (Obj*)snapfile)) ) { Snapshot *snapshot = Snapshot_Read_File(Snapshot_new(), folder, entry); Lock *lock = IxManager_Make_Snapshot_Read_Lock(ivars->manager, entry); Vector *snap_list = Snapshot_List(snapshot); Vector *referenced = S_find_all_referenced(folder, snap_list); // DON'T obtain the lock -- only see whether another // entity holds a lock on the snapshot file. if (lock) { Lock_Clear_Stale(lock); } if (lock && Lock_Is_Locked(lock)) { // The snapshot file is locked, which means someone's using // that version of the index -- protect all of its entries. uint32_t new_size = Vec_Get_Size(spared) + Vec_Get_Size(referenced) + 1; Vec_Grow(spared, new_size); Vec_Push(spared, (Obj*)Str_Clone(entry)); Vec_Push_All(spared, referenced); } else { // No one's using this snapshot, so all of its entries are // candidates for deletion. for (uint32_t i = 0, max = Vec_Get_Size(referenced); i < max; i++) { String *file = (String*)Vec_Fetch(referenced, i); Hash_Store(candidates, file, (Obj*)CFISH_TRUE); } Vec_Push(snapshots, INCREF(snapshot)); } DECREF(referenced); DECREF(snap_list); DECREF(snapshot); DECREF(lock); } DECREF(entry); } DECREF(dh); // Clean up after a dead segment consolidation. S_zap_dead_merge(self, candidates); // Eliminate any current files from the list of files to be purged. for (uint32_t i = 0, max = Vec_Get_Size(spared); i < max; i++) { String *filename = (String*)Vec_Fetch(spared, i); DECREF(Hash_Delete(candidates, filename)); } // Pass back purgables and Snapshots. *purgables_ptr = Hash_Keys(candidates); *snapshots_ptr = snapshots; DECREF(candidates); DECREF(spared); }
void BGMerger_Prepare_Commit_IMP(BackgroundMerger *self) { BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self); Vector *seg_readers = PolyReader_Get_Seg_Readers(ivars->polyreader); uint32_t num_seg_readers = Vec_Get_Size(seg_readers); uint32_t segs_merged = 0; if (ivars->prepared) { THROW(ERR, "Can't call Prepare_Commit() more than once"); } // Maybe merge existing index data. if (num_seg_readers) { segs_merged = S_maybe_merge(self); } if (!segs_merged) { // Nothing merged. Leave `needs_commit` false and bail out. ivars->prepared = true; return; } // Finish the segment and write a new snapshot file. else { Folder *folder = ivars->folder; Snapshot *snapshot = ivars->snapshot; // Write out new deletions. if (DelWriter_Updated(ivars->del_writer)) { // Only write out if they haven't all been applied. if (segs_merged != num_seg_readers) { DelWriter_Finish(ivars->del_writer); } } // Finish the segment. SegWriter_Finish(ivars->seg_writer); // Grab the write lock. S_obtain_write_lock(self); if (!ivars->write_lock) { RETHROW(INCREF(Err_get_error())); } // Write temporary snapshot file. DECREF(ivars->snapfile); String *snapfile = IxManager_Make_Snapshot_Filename(ivars->manager); ivars->snapfile = Str_Cat_Trusted_Utf8(snapfile, ".temp", 5); DECREF(snapfile); Folder_Delete(folder, ivars->snapfile); Snapshot_Write_File(snapshot, folder, ivars->snapfile); // Determine whether the index has been updated while this background // merge process was running. String *start_snapfile = Snapshot_Get_Path(PolyReader_Get_Snapshot(ivars->polyreader)); Snapshot *latest_snapshot = Snapshot_Read_File(Snapshot_new(), ivars->folder, NULL); String *latest_snapfile = Snapshot_Get_Path(latest_snapshot); bool index_updated = !Str_Equals(start_snapfile, (Obj*)latest_snapfile); if (index_updated) { /* See if new deletions have been applied since this * background merge process started against any of the * segments we just merged away. If that's true, we need to * write another segment which applies the deletions against * the new composite segment. */ S_merge_updated_deletions(self); // Add the fresh content to our snapshot. (It's important to // run this AFTER S_merge_updated_deletions, because otherwise // we couldn't tell whether the deletion counts changed.) Vector *files = Snapshot_List(latest_snapshot); for (uint32_t i = 0, max = Vec_Get_Size(files); i < max; i++) { String *file = (String*)Vec_Fetch(files, i); if (Str_Starts_With_Utf8(file, "seg_", 4)) { int64_t gen = (int64_t)IxFileNames_extract_gen(file); if (gen > ivars->cutoff) { Snapshot_Add_Entry(ivars->snapshot, file); } } } DECREF(files); // Since the snapshot content has changed, we need to rewrite it. Folder_Delete(folder, ivars->snapfile); Snapshot_Write_File(snapshot, folder, ivars->snapfile); } DECREF(latest_snapshot); ivars->needs_commit = true; } // Close reader, so that we can delete its files if appropriate. PolyReader_Close(ivars->polyreader); ivars->prepared = true; }
void S_try_open_elements(void *context) { struct try_open_elements_context *args = (struct try_open_elements_context*)context; PolyReader *self = args->self; PolyReaderIVARS *const ivars = PolyReader_IVARS(self); VArray *files = Snapshot_List(ivars->snapshot); Folder *folder = PolyReader_Get_Folder(self); uint32_t num_segs = 0; uint64_t latest_schema_gen = 0; String *schema_file = NULL; // Find schema file, count segments. for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) { String *entry = (String*)VA_Fetch(files, i); if (Seg_valid_seg_name(entry)) { num_segs++; } else if (Str_Starts_With_Utf8(entry, "schema_", 7) && Str_Ends_With_Utf8(entry, ".json", 5) ) { uint64_t gen = IxFileNames_extract_gen(entry); if (gen > latest_schema_gen) { latest_schema_gen = gen; schema_file = entry; } } } // Read Schema. if (!schema_file) { DECREF(files); THROW(ERR, "Can't find a schema file."); } else { Obj *dump = Json_slurp_json(folder, schema_file); if (dump) { // read file successfully DECREF(ivars->schema); ivars->schema = (Schema*)CERTIFY(Freezer_load(dump), SCHEMA); DECREF(dump); schema_file = NULL; } else { String *mess = MAKE_MESS("Failed to parse %o", schema_file); DECREF(files); Err_throw_mess(ERR, mess); } } VArray *segments = VA_new(num_segs); for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) { String *entry = (String*)VA_Fetch(files, i); // Create a Segment for each segmeta. if (Seg_valid_seg_name(entry)) { int64_t seg_num = IxFileNames_extract_gen(entry); Segment *segment = Seg_new(seg_num); // Bail if reading the file fails (probably because it's been // deleted and a new snapshot file has been written so we need to // retry). if (Seg_Read_File(segment, folder)) { VA_Push(segments, (Obj*)segment); } else { String *mess = MAKE_MESS("Failed to read %o", entry); DECREF(segment); DECREF(segments); DECREF(files); Err_throw_mess(ERR, mess); } } } // Sort the segments by age. VA_Sort(segments, NULL, NULL); // Open individual SegReaders. struct try_open_segreader_context seg_context; seg_context.schema = PolyReader_Get_Schema(self); seg_context.folder = folder; seg_context.snapshot = PolyReader_Get_Snapshot(self); seg_context.segments = segments; seg_context.result = NULL; args->seg_readers = VA_new(num_segs); Err *error = NULL; for (uint32_t seg_tick = 0; seg_tick < num_segs; seg_tick++) { seg_context.seg_tick = seg_tick; error = Err_trap(S_try_open_segreader, &seg_context); if (error) { break; } VA_Push(args->seg_readers, (Obj*)seg_context.result); seg_context.result = NULL; } DECREF(segments); DECREF(files); if (error) { DECREF(args->seg_readers); args->seg_readers = NULL; RETHROW(error); } }
static bool S_is_absolute(String *path) { return Str_Starts_With_Utf8(path, DIR_SEP, 1); }