BackgroundMerger* BGMerger_init(BackgroundMerger *self, Obj *index, IndexManager *manager) { BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self); Folder *folder = S_init_folder(index); // Init. ivars->optimize = false; ivars->prepared = false; ivars->needs_commit = false; ivars->snapfile = NULL; ivars->doc_maps = Hash_new(0); // Assign. ivars->folder = folder; if (manager) { ivars->manager = (IndexManager*)INCREF(manager); } else { ivars->manager = IxManager_new(NULL, NULL); IxManager_Set_Write_Lock_Timeout(ivars->manager, 10000); } IxManager_Set_Folder(ivars->manager, folder); // Obtain write lock (which we'll only hold briefly), then merge lock. S_obtain_write_lock(self); if (!ivars->write_lock) { DECREF(self); RETHROW(INCREF(Err_get_error())); } S_obtain_merge_lock(self); if (!ivars->merge_lock) { DECREF(self); RETHROW(INCREF(Err_get_error())); } // Find the latest snapshot. If there's no index content, bail early. ivars->snapshot = Snapshot_Read_File(Snapshot_new(), folder, NULL); if (!Snapshot_Get_Path(ivars->snapshot)) { S_release_write_lock(self); S_release_merge_lock(self); return self; } // Create FilePurger. Zap detritus from previous sessions. ivars->file_purger = FilePurger_new(folder, ivars->snapshot, ivars->manager); FilePurger_Purge(ivars->file_purger); // Open a PolyReader, passing in the IndexManager so we get a read lock on // the Snapshot's files -- so that Indexers don't zap our files while // we're operating in the background. ivars->polyreader = PolyReader_open((Obj*)folder, NULL, ivars->manager); // Clone the PolyReader's schema. Obj *dump = (Obj*)Schema_Dump(PolyReader_Get_Schema(ivars->polyreader)); ivars->schema = (Schema*)CERTIFY(Freezer_load(dump), SCHEMA); DECREF(dump); // Create new Segment. int64_t new_seg_num = IxManager_Highest_Seg_Num(ivars->manager, ivars->snapshot) + 1; Vector *fields = Schema_All_Fields(ivars->schema); ivars->segment = Seg_new(new_seg_num); for (uint32_t i = 0, max = Vec_Get_Size(fields); i < max; i++) { Seg_Add_Field(ivars->segment, (String*)Vec_Fetch(fields, i)); } DECREF(fields); // Our "cutoff" is the segment this BackgroundMerger will write. Now that // we've determined the cutoff, write the merge data file. ivars->cutoff = Seg_Get_Number(ivars->segment); IxManager_Write_Merge_Data(ivars->manager, ivars->cutoff); /* Create the SegWriter but hold off on preparing the new segment * directory -- because if we don't need to merge any segments we don't * need it. (We've reserved the dir by plopping down the merge.json * file.) */ ivars->seg_writer = SegWriter_new(ivars->schema, ivars->snapshot, ivars->segment, ivars->polyreader); // Grab a local ref to the DeletionsWriter. ivars->del_writer = (DeletionsWriter*)INCREF(SegWriter_Get_Del_Writer(ivars->seg_writer)); // Release the write lock. Now new Indexers can start while we work in // the background. S_release_write_lock(self); return self; }
void S_try_open_elements(void *context) { struct try_open_elements_context *args = (struct try_open_elements_context*)context; PolyReader *self = args->self; PolyReaderIVARS *const ivars = PolyReader_IVARS(self); VArray *files = Snapshot_List(ivars->snapshot); Folder *folder = PolyReader_Get_Folder(self); uint32_t num_segs = 0; uint64_t latest_schema_gen = 0; String *schema_file = NULL; // Find schema file, count segments. for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) { String *entry = (String*)VA_Fetch(files, i); if (Seg_valid_seg_name(entry)) { num_segs++; } else if (Str_Starts_With_Utf8(entry, "schema_", 7) && Str_Ends_With_Utf8(entry, ".json", 5) ) { uint64_t gen = IxFileNames_extract_gen(entry); if (gen > latest_schema_gen) { latest_schema_gen = gen; schema_file = entry; } } } // Read Schema. if (!schema_file) { DECREF(files); THROW(ERR, "Can't find a schema file."); } else { Obj *dump = Json_slurp_json(folder, schema_file); if (dump) { // read file successfully DECREF(ivars->schema); ivars->schema = (Schema*)CERTIFY(Freezer_load(dump), SCHEMA); DECREF(dump); schema_file = NULL; } else { String *mess = MAKE_MESS("Failed to parse %o", schema_file); DECREF(files); Err_throw_mess(ERR, mess); } } VArray *segments = VA_new(num_segs); for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) { String *entry = (String*)VA_Fetch(files, i); // Create a Segment for each segmeta. if (Seg_valid_seg_name(entry)) { int64_t seg_num = IxFileNames_extract_gen(entry); Segment *segment = Seg_new(seg_num); // Bail if reading the file fails (probably because it's been // deleted and a new snapshot file has been written so we need to // retry). if (Seg_Read_File(segment, folder)) { VA_Push(segments, (Obj*)segment); } else { String *mess = MAKE_MESS("Failed to read %o", entry); DECREF(segment); DECREF(segments); DECREF(files); Err_throw_mess(ERR, mess); } } } // Sort the segments by age. VA_Sort(segments, NULL, NULL); // Open individual SegReaders. struct try_open_segreader_context seg_context; seg_context.schema = PolyReader_Get_Schema(self); seg_context.folder = folder; seg_context.snapshot = PolyReader_Get_Snapshot(self); seg_context.segments = segments; seg_context.result = NULL; args->seg_readers = VA_new(num_segs); Err *error = NULL; for (uint32_t seg_tick = 0; seg_tick < num_segs; seg_tick++) { seg_context.seg_tick = seg_tick; error = Err_trap(S_try_open_segreader, &seg_context); if (error) { break; } VA_Push(args->seg_readers, (Obj*)seg_context.result); seg_context.result = NULL; } DECREF(segments); DECREF(files); if (error) { DECREF(args->seg_readers); args->seg_readers = NULL; RETHROW(error); } }
static bool S_merge_updated_deletions(BackgroundMerger *self) { BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self); Hash *updated_deletions = NULL; PolyReader *new_polyreader = PolyReader_open((Obj*)ivars->folder, NULL, NULL); Vector *new_seg_readers = PolyReader_Get_Seg_Readers(new_polyreader); Vector *old_seg_readers = PolyReader_Get_Seg_Readers(ivars->polyreader); Hash *new_segs = Hash_new(Vec_Get_Size(new_seg_readers)); for (uint32_t i = 0, max = Vec_Get_Size(new_seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(new_seg_readers, i); String *seg_name = SegReader_Get_Seg_Name(seg_reader); Hash_Store(new_segs, seg_name, INCREF(seg_reader)); } for (uint32_t i = 0, max = Vec_Get_Size(old_seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(old_seg_readers, i); String *seg_name = SegReader_Get_Seg_Name(seg_reader); // If this segment was merged away... if (Hash_Fetch(ivars->doc_maps, seg_name)) { SegReader *new_seg_reader = (SegReader*)CERTIFY( Hash_Fetch(new_segs, seg_name), SEGREADER); int32_t old_del_count = SegReader_Del_Count(seg_reader); int32_t new_del_count = SegReader_Del_Count(new_seg_reader); // ... were any new deletions applied against it? if (old_del_count != new_del_count) { DeletionsReader *del_reader = (DeletionsReader*)SegReader_Obtain( new_seg_reader, Class_Get_Name(DELETIONSREADER)); if (!updated_deletions) { updated_deletions = Hash_new(max); } Hash_Store(updated_deletions, seg_name, (Obj*)DelReader_Iterator(del_reader)); } } } DECREF(new_polyreader); DECREF(new_segs); if (!updated_deletions) { return false; } else { PolyReader *merge_polyreader = PolyReader_open((Obj*)ivars->folder, ivars->snapshot, NULL); Vector *merge_seg_readers = PolyReader_Get_Seg_Readers(merge_polyreader); Snapshot *latest_snapshot = Snapshot_Read_File(Snapshot_new(), ivars->folder, NULL); int64_t new_seg_num = IxManager_Highest_Seg_Num(ivars->manager, latest_snapshot) + 1; Segment *new_segment = Seg_new(new_seg_num); SegWriter *seg_writer = SegWriter_new(ivars->schema, ivars->snapshot, new_segment, merge_polyreader); DeletionsWriter *del_writer = SegWriter_Get_Del_Writer(seg_writer); int64_t merge_seg_num = Seg_Get_Number(ivars->segment); uint32_t seg_tick = INT32_MAX; int32_t offset = INT32_MAX; SegWriter_Prep_Seg_Dir(seg_writer); for (uint32_t i = 0, max = Vec_Get_Size(merge_seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(merge_seg_readers, i); if (SegReader_Get_Seg_Num(seg_reader) == merge_seg_num) { I32Array *offsets = PolyReader_Offsets(merge_polyreader); seg_tick = i; offset = I32Arr_Get(offsets, seg_tick); DECREF(offsets); } } if (offset == INT32_MAX) { THROW(ERR, "Failed sanity check"); } HashIterator *iter = HashIter_new(updated_deletions); while (HashIter_Next(iter)) { String *seg_name = HashIter_Get_Key(iter); Matcher *deletions = (Matcher*)HashIter_Get_Value(iter); I32Array *doc_map = (I32Array*)CERTIFY( Hash_Fetch(ivars->doc_maps, seg_name), I32ARRAY); int32_t del; while (0 != (del = Matcher_Next(deletions))) { // Find the slot where the deleted doc resides in the // rewritten segment. If the doc was already deleted when we // were merging, do nothing. int32_t remapped = I32Arr_Get(doc_map, del); if (remapped) { // It's a new deletion, so carry it forward and zap it in // the rewritten segment. DelWriter_Delete_By_Doc_ID(del_writer, remapped + offset); } } } DECREF(iter); // Finish the segment and clean up. DelWriter_Finish(del_writer); SegWriter_Finish(seg_writer); DECREF(seg_writer); DECREF(new_segment); DECREF(latest_snapshot); DECREF(merge_polyreader); DECREF(updated_deletions); } return true; }
Indexer* Indexer_init(Indexer *self, Schema *schema, Obj *index, IndexManager *manager, int32_t flags) { IndexerIVARS *const ivars = Indexer_IVARS(self); bool create = (flags & Indexer_CREATE) ? true : false; bool truncate = (flags & Indexer_TRUNCATE) ? true : false; Folder *folder = S_init_folder(index, create); Snapshot *latest_snapshot = Snapshot_new(); // Init. ivars->stock_doc = Doc_new(NULL, 0); ivars->truncate = false; ivars->optimize = false; ivars->prepared = false; ivars->needs_commit = false; ivars->snapfile = NULL; ivars->merge_lock = NULL; // Assign. ivars->folder = folder; ivars->manager = manager ? (IndexManager*)INCREF(manager) : IxManager_new(NULL, NULL); IxManager_Set_Folder(ivars->manager, folder); // Get a write lock for this folder. Lock *write_lock = IxManager_Make_Write_Lock(ivars->manager); Lock_Clear_Stale(write_lock); if (Lock_Obtain(write_lock)) { // Only assign if successful, otherwise DESTROY unlocks -- bad! ivars->write_lock = write_lock; } else { DECREF(write_lock); DECREF(self); RETHROW(INCREF(Err_get_error())); } // Find the latest snapshot or create a new one. String *latest_snapfile = IxFileNames_latest_snapshot(folder); if (latest_snapfile) { Snapshot_Read_File(latest_snapshot, folder, latest_snapfile); } // Look for an existing Schema if one wasn't supplied. if (schema) { ivars->schema = (Schema*)INCREF(schema); } else { if (!latest_snapfile) { S_release_write_lock(self); THROW(ERR, "No Schema supplied, and can't find one in the index"); } else { String *schema_file = S_find_schema_file(latest_snapshot); Obj *dump = Json_slurp_json(folder, schema_file); if (dump) { // read file successfully ivars->schema = (Schema*)CERTIFY(Freezer_load(dump), SCHEMA); schema = ivars->schema; DECREF(dump); schema_file = NULL; } else { THROW(ERR, "Failed to parse %o", schema_file); } } } // If we're clobbering, start with an empty Snapshot and an empty // PolyReader. Otherwise, start with the most recent Snapshot and an // up-to-date PolyReader. if (truncate) { ivars->snapshot = Snapshot_new(); ivars->polyreader = PolyReader_new(schema, folder, NULL, NULL, NULL); ivars->truncate = true; } else { // TODO: clone most recent snapshot rather than read it twice. ivars->snapshot = (Snapshot*)INCREF(latest_snapshot); ivars->polyreader = latest_snapfile ? PolyReader_open((Obj*)folder, NULL, NULL) : PolyReader_new(schema, folder, NULL, NULL, NULL); if (latest_snapfile) { // Make sure than any existing fields which may have been // dynamically added during past indexing sessions get added. Schema *old_schema = PolyReader_Get_Schema(ivars->polyreader); Schema_Eat(schema, old_schema); } } // Zap detritus from previous sessions. // Note: we have to feed FilePurger with the most recent snapshot file // now, but with the Indexer's snapshot later. FilePurger *file_purger = FilePurger_new(folder, latest_snapshot, ivars->manager); FilePurger_Purge(file_purger); DECREF(file_purger); // Create a new segment. int64_t new_seg_num = IxManager_Highest_Seg_Num(ivars->manager, latest_snapshot) + 1; Lock *merge_lock = IxManager_Make_Merge_Lock(ivars->manager); if (Lock_Is_Locked(merge_lock)) { // If there's a background merge process going on, stay out of its // way. Hash *merge_data = IxManager_Read_Merge_Data(ivars->manager); Obj *cutoff_obj = merge_data ? Hash_Fetch_Utf8(merge_data, "cutoff", 6) : NULL; if (!cutoff_obj) { DECREF(merge_lock); DECREF(merge_data); THROW(ERR, "Background merge detected, but can't read merge data"); } else { int64_t cutoff = Json_obj_to_i64(cutoff_obj); if (cutoff >= new_seg_num) { new_seg_num = cutoff + 1; } } DECREF(merge_data); } ivars->segment = Seg_new(new_seg_num); // Add all known fields to Segment. Vector *fields = Schema_All_Fields(schema); for (size_t i = 0, max = Vec_Get_Size(fields); i < max; i++) { Seg_Add_Field(ivars->segment, (String*)Vec_Fetch(fields, i)); } DECREF(fields); DECREF(merge_lock); // Create new SegWriter and FilePurger. ivars->file_purger = FilePurger_new(folder, ivars->snapshot, ivars->manager); ivars->seg_writer = SegWriter_new(ivars->schema, ivars->snapshot, ivars->segment, ivars->polyreader); SegWriter_Prep_Seg_Dir(ivars->seg_writer); // Grab a local ref to the DeletionsWriter. ivars->del_writer = (DeletionsWriter*)INCREF( SegWriter_Get_Del_Writer(ivars->seg_writer)); DECREF(latest_snapfile); DECREF(latest_snapshot); return self; }