static void S_zap_dead_merge(FilePurger *self, Hash *candidates) { IndexManager *manager = self->manager; Lock *merge_lock = IxManager_Make_Merge_Lock(manager); Lock_Clear_Stale(merge_lock); if (!Lock_Is_Locked(merge_lock)) { Hash *merge_data = IxManager_Read_Merge_Data(manager); Obj *cutoff = merge_data ? Hash_Fetch_Str(merge_data, "cutoff", 6) : NULL; if (cutoff) { CharBuf *cutoff_seg = Seg_num_to_name(Obj_To_I64(cutoff)); if (Folder_Exists(self->folder, cutoff_seg)) { ZombieCharBuf *merge_json = ZCB_WRAP_STR("merge.json", 10); DirHandle *dh = Folder_Open_Dir(self->folder, cutoff_seg); CharBuf *entry = dh ? DH_Get_Entry(dh) : NULL; CharBuf *filepath = CB_new(32); if (!dh) { THROW(ERR, "Can't open segment dir '%o'", filepath); } Hash_Store(candidates, (Obj*)cutoff_seg, INCREF(&EMPTY)); Hash_Store(candidates, (Obj*)merge_json, INCREF(&EMPTY)); while (DH_Next(dh)) { // TODO: recursively delete subdirs within seg dir. CB_setf(filepath, "%o/%o", cutoff_seg, entry); Hash_Store(candidates, (Obj*)filepath, INCREF(&EMPTY)); } DECREF(filepath); DECREF(dh); } DECREF(cutoff_seg); } DECREF(merge_data); } DECREF(merge_lock); return; }
static void S_zap_dead_merge(FilePurger *self, Hash *candidates) { FilePurgerIVARS *const ivars = FilePurger_IVARS(self); IndexManager *manager = ivars->manager; Lock *merge_lock = IxManager_Make_Merge_Lock(manager); Lock_Clear_Stale(merge_lock); if (!Lock_Is_Locked(merge_lock)) { Hash *merge_data = IxManager_Read_Merge_Data(manager); Obj *cutoff = merge_data ? Hash_Fetch_Utf8(merge_data, "cutoff", 6) : NULL; if (cutoff) { String *cutoff_seg = Seg_num_to_name(Json_obj_to_i64(cutoff)); if (Folder_Exists(ivars->folder, cutoff_seg)) { String *merge_json = SSTR_WRAP_UTF8("merge.json", 10); DirHandle *dh = Folder_Open_Dir(ivars->folder, cutoff_seg); if (!dh) { THROW(ERR, "Can't open segment dir '%o'", cutoff_seg); } Hash_Store(candidates, cutoff_seg, (Obj*)CFISH_TRUE); Hash_Store(candidates, merge_json, (Obj*)CFISH_TRUE); while (DH_Next(dh)) { // TODO: recursively delete subdirs within seg dir. String *entry = DH_Get_Entry(dh); String *filepath = Str_newf("%o/%o", cutoff_seg, entry); Hash_Store(candidates, filepath, (Obj*)CFISH_TRUE); DECREF(filepath); DECREF(entry); } DECREF(dh); } DECREF(cutoff_seg); } DECREF(merge_data); } DECREF(merge_lock); return; }
static void S_discover_unused(FilePurger *self, VArray **purgables_ptr, VArray **snapshots_ptr) { Folder *folder = self->folder; DirHandle *dh = Folder_Open_Dir(folder, NULL); if (!dh) { RETHROW(INCREF(Err_get_error())); } VArray *spared = VA_new(1); VArray *snapshots = VA_new(1); CharBuf *snapfile = NULL; // Start off with the list of files in the current snapshot. if (self->snapshot) { VArray *entries = Snapshot_List(self->snapshot); VArray *referenced = S_find_all_referenced(folder, entries); VA_Push_VArray(spared, referenced); DECREF(entries); DECREF(referenced); snapfile = Snapshot_Get_Path(self->snapshot); if (snapfile) { VA_Push(spared, INCREF(snapfile)); } } CharBuf *entry = DH_Get_Entry(dh); Hash *candidates = Hash_new(64); while (DH_Next(dh)) { if (!CB_Starts_With_Str(entry, "snapshot_", 9)) { continue; } else if (!CB_Ends_With_Str(entry, ".json", 5)) { continue; } else if (snapfile && CB_Equals(entry, (Obj*)snapfile)) { continue; } else { Snapshot *snapshot = Snapshot_Read_File(Snapshot_new(), folder, entry); Lock *lock = IxManager_Make_Snapshot_Read_Lock(self->manager, entry); VArray *snap_list = Snapshot_List(snapshot); VArray *referenced = S_find_all_referenced(folder, snap_list); // DON'T obtain the lock -- only see whether another // entity holds a lock on the snapshot file. if (lock) { Lock_Clear_Stale(lock); } if (lock && Lock_Is_Locked(lock)) { // The snapshot file is locked, which means someone's using // that version of the index -- protect all of its entries. uint32_t new_size = VA_Get_Size(spared) + VA_Get_Size(referenced) + 1; VA_Grow(spared, new_size); VA_Push(spared, (Obj*)CB_Clone(entry)); VA_Push_VArray(spared, referenced); } else { // No one's using this snapshot, so all of its entries are // candidates for deletion. for (uint32_t i = 0, max = VA_Get_Size(referenced); i < max; i++) { CharBuf *file = (CharBuf*)VA_Fetch(referenced, i); Hash_Store(candidates, (Obj*)file, INCREF(&EMPTY)); } VA_Push(snapshots, INCREF(snapshot)); } DECREF(referenced); DECREF(snap_list); DECREF(snapshot); DECREF(lock); } } DECREF(dh); // Clean up after a dead segment consolidation. S_zap_dead_merge(self, candidates); // Eliminate any current files from the list of files to be purged. for (uint32_t i = 0, max = VA_Get_Size(spared); i < max; i++) { CharBuf *filename = (CharBuf*)VA_Fetch(spared, i); DECREF(Hash_Delete(candidates, (Obj*)filename)); } // Pass back purgables and Snapshots. *purgables_ptr = Hash_Keys(candidates); *snapshots_ptr = snapshots; DECREF(candidates); DECREF(spared); }
Indexer* Indexer_init(Indexer *self, Schema *schema, Obj *index, IndexManager *manager, int32_t flags) { bool_t create = (flags & Indexer_CREATE) ? true : false; bool_t truncate = (flags & Indexer_TRUNCATE) ? true : false; Folder *folder = S_init_folder(index, create); Lock *write_lock; CharBuf *latest_snapfile; Snapshot *latest_snapshot = Snapshot_new(); // Init. self->stock_doc = Doc_new(NULL, 0); self->truncate = false; self->optimize = false; self->prepared = false; self->needs_commit = false; self->snapfile = NULL; self->merge_lock = NULL; // Assign. self->folder = folder; self->manager = manager ? (IndexManager*)INCREF(manager) : IxManager_new(NULL, NULL); IxManager_Set_Folder(self->manager, folder); // Get a write lock for this folder. write_lock = IxManager_Make_Write_Lock(self->manager); Lock_Clear_Stale(write_lock); if (Lock_Obtain(write_lock)) { // Only assign if successful, otherwise DESTROY unlocks -- bad! self->write_lock = write_lock; } else { DECREF(write_lock); DECREF(self); RETHROW(INCREF(Err_get_error())); } // Find the latest snapshot or create a new one. latest_snapfile = IxFileNames_latest_snapshot(folder); if (latest_snapfile) { Snapshot_Read_File(latest_snapshot, folder, latest_snapfile); } // Look for an existing Schema if one wasn't supplied. if (schema) { self->schema = (Schema*)INCREF(schema); } else { if (!latest_snapfile) { THROW(ERR, "No Schema supplied, and can't find one in the index"); } else { CharBuf *schema_file = S_find_schema_file(latest_snapshot); Hash *dump = (Hash*)Json_slurp_json(folder, schema_file); if (dump) { // read file successfully self->schema = (Schema*)CERTIFY( VTable_Load_Obj(SCHEMA, (Obj*)dump), SCHEMA); schema = self->schema; DECREF(dump); schema_file = NULL; } else { THROW(ERR, "Failed to parse %o", schema_file); } } } // If we're clobbering, start with an empty Snapshot and an empty // PolyReader. Otherwise, start with the most recent Snapshot and an // up-to-date PolyReader. if (truncate) { self->snapshot = Snapshot_new(); self->polyreader = PolyReader_new(schema, folder, NULL, NULL, NULL); self->truncate = true; } else { // TODO: clone most recent snapshot rather than read it twice. self->snapshot = (Snapshot*)INCREF(latest_snapshot); self->polyreader = latest_snapfile ? PolyReader_open((Obj*)folder, NULL, NULL) : PolyReader_new(schema, folder, NULL, NULL, NULL); if (latest_snapfile) { // Make sure than any existing fields which may have been // dynamically added during past indexing sessions get added. Schema *old_schema = PolyReader_Get_Schema(self->polyreader); Schema_Eat(schema, old_schema); } } // Zap detritus from previous sessions. { // Note: we have to feed FilePurger with the most recent snapshot file // now, but with the Indexer's snapshot later. FilePurger *file_purger = FilePurger_new(folder, latest_snapshot, self->manager); FilePurger_Purge(file_purger); DECREF(file_purger); } // Create a new segment. { int64_t new_seg_num = IxManager_Highest_Seg_Num(self->manager, latest_snapshot) + 1; Lock *merge_lock = IxManager_Make_Merge_Lock(self->manager); uint32_t i, max; if (Lock_Is_Locked(merge_lock)) { // If there's a background merge process going on, stay out of its // way. Hash *merge_data = IxManager_Read_Merge_Data(self->manager); Obj *cutoff_obj = merge_data ? Hash_Fetch_Str(merge_data, "cutoff", 6) : NULL; if (!cutoff_obj) { DECREF(merge_lock); DECREF(merge_data); THROW(ERR, "Background merge detected, but can't read merge data"); } else { int64_t cutoff = Obj_To_I64(cutoff_obj); if (cutoff >= new_seg_num) { new_seg_num = cutoff + 1; } } DECREF(merge_data); } self->segment = Seg_new(new_seg_num); // Add all known fields to Segment. { VArray *fields = Schema_All_Fields(schema); for (i = 0, max = VA_Get_Size(fields); i < max; i++) { Seg_Add_Field(self->segment, (CharBuf*)VA_Fetch(fields, i)); } DECREF(fields); } DECREF(merge_lock); } // Create new SegWriter and FilePurger. self->file_purger = FilePurger_new(folder, self->snapshot, self->manager); self->seg_writer = SegWriter_new(self->schema, self->snapshot, self->segment, self->polyreader); SegWriter_Prep_Seg_Dir(self->seg_writer); // Grab a local ref to the DeletionsWriter. self->del_writer = (DeletionsWriter*)INCREF( SegWriter_Get_Del_Writer(self->seg_writer)); DECREF(latest_snapfile); DECREF(latest_snapshot); return self; }