void Indexer_prepare_commit(Indexer *self) { VArray *seg_readers = PolyReader_Get_Seg_Readers(self->polyreader); uint32_t num_seg_readers = VA_Get_Size(seg_readers); bool_t merge_happened = false; if ( !self->write_lock || self->prepared ) { THROW(ERR, "Can't call Prepare_Commit() more than once"); } // Merge existing index data. if (num_seg_readers) { merge_happened = S_maybe_merge(self, seg_readers); } // Add a new segment and write a new snapshot file if... if ( Seg_Get_Count(self->segment) // Docs/segs added. || merge_happened // Some segs merged. || !Snapshot_Num_Entries(self->snapshot) // Initializing index. || DelWriter_Updated(self->del_writer) ) { Folder *folder = self->folder; Schema *schema = self->schema; Snapshot *snapshot = self->snapshot; CharBuf *old_schema_name = S_find_schema_file(snapshot); uint64_t schema_gen = old_schema_name ? IxFileNames_extract_gen(old_schema_name) + 1 : 1; char base36[StrHelp_MAX_BASE36_BYTES]; CharBuf *new_schema_name; StrHelp_to_base36(schema_gen, &base36); new_schema_name = CB_newf("schema_%s.json", base36); // Finish the segment, write schema file. SegWriter_Finish(self->seg_writer); Schema_Write(schema, folder, new_schema_name); if (old_schema_name) { Snapshot_Delete_Entry(snapshot, old_schema_name); } Snapshot_Add_Entry(snapshot, new_schema_name); DECREF(new_schema_name); // Write temporary snapshot file. DECREF(self->snapfile); self->snapfile = IxManager_Make_Snapshot_Filename(self->manager); CB_Cat_Trusted_Str(self->snapfile, ".temp", 5); Folder_Delete(folder, self->snapfile); Snapshot_Write_File(snapshot, folder, self->snapfile); self->needs_commit = true; } // Close reader, so that we can delete its files if appropriate. PolyReader_Close(self->polyreader); self->prepared = true; }
void Indexer_Prepare_Commit_IMP(Indexer *self) { IndexerIVARS *const ivars = Indexer_IVARS(self); Vector *seg_readers = PolyReader_Get_Seg_Readers(ivars->polyreader); size_t num_seg_readers = Vec_Get_Size(seg_readers); bool merge_happened = false; if (!ivars->write_lock || ivars->prepared) { THROW(ERR, "Can't call Prepare_Commit() more than once"); } // Merge existing index data. if (num_seg_readers) { merge_happened = S_maybe_merge(self, seg_readers); } // Add a new segment and write a new snapshot file if... if (Seg_Get_Count(ivars->segment) // Docs/segs added. || merge_happened // Some segs merged. || !Snapshot_Num_Entries(ivars->snapshot) // Initializing index. || DelWriter_Updated(ivars->del_writer) ) { Folder *folder = ivars->folder; Schema *schema = ivars->schema; Snapshot *snapshot = ivars->snapshot; // Derive snapshot and schema file names. DECREF(ivars->snapfile); String *snapfile = IxManager_Make_Snapshot_Filename(ivars->manager); ivars->snapfile = Str_Cat_Trusted_Utf8(snapfile, ".temp", 5); DECREF(snapfile); uint64_t schema_gen = IxFileNames_extract_gen(ivars->snapfile); char base36[StrHelp_MAX_BASE36_BYTES]; StrHelp_to_base36(schema_gen, &base36); String *new_schema_name = Str_newf("schema_%s.json", base36); // Finish the segment, write schema file. SegWriter_Finish(ivars->seg_writer); Schema_Write(schema, folder, new_schema_name); String *old_schema_name = S_find_schema_file(snapshot); if (old_schema_name) { Snapshot_Delete_Entry(snapshot, old_schema_name); } Snapshot_Add_Entry(snapshot, new_schema_name); DECREF(new_schema_name); // Write temporary snapshot file. Folder_Delete(folder, ivars->snapfile); Snapshot_Write_File(snapshot, folder, ivars->snapfile); ivars->needs_commit = true; } // Close reader, so that we can delete its files if appropriate. PolyReader_Close(ivars->polyreader); ivars->prepared = true; }
static bool_t S_maybe_merge(Indexer *self, VArray *seg_readers) { bool_t merge_happened = false; uint32_t num_seg_readers = VA_Get_Size(seg_readers); Lock *merge_lock = IxManager_Make_Merge_Lock(self->manager); bool_t got_merge_lock = Lock_Obtain(merge_lock); int64_t cutoff; VArray *to_merge; uint32_t i, max; if (got_merge_lock) { self->merge_lock = merge_lock; cutoff = 0; } else { // If something else holds the merge lock, don't interfere. Hash *merge_data = IxManager_Read_Merge_Data(self->manager); if (merge_data) { Obj *cutoff_obj = Hash_Fetch_Str(merge_data, "cutoff", 6); if (cutoff_obj) { cutoff = Obj_To_I64(cutoff_obj); } else { cutoff = I64_MAX; } DECREF(merge_data); } else { cutoff = I64_MAX; } DECREF(merge_lock); } // Get a list of segments to recycle. Validate and confirm that there are // no dupes in the list. to_merge = IxManager_Recycle(self->manager, self->polyreader, self->del_writer, cutoff, self->optimize); { Hash *seen = Hash_new(VA_Get_Size(to_merge)); for (i = 0, max = VA_Get_Size(to_merge); i < max; i++) { SegReader *seg_reader = (SegReader*)CERTIFY( VA_Fetch(to_merge, i), SEGREADER); CharBuf *seg_name = SegReader_Get_Seg_Name(seg_reader); if (Hash_Fetch(seen, (Obj*)seg_name)) { DECREF(seen); DECREF(to_merge); THROW(ERR, "Recycle() tried to merge segment '%o' twice", seg_name); } Hash_Store(seen, (Obj*)seg_name, INCREF(&EMPTY)); } DECREF(seen); } // Consolidate segments if either sparse or optimizing forced. for (i = 0, max = VA_Get_Size(to_merge); i < max; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(to_merge, i); int64_t seg_num = SegReader_Get_Seg_Num(seg_reader); Matcher *deletions = DelWriter_Seg_Deletions(self->del_writer, seg_reader); I32Array *doc_map = DelWriter_Generate_Doc_Map(self->del_writer, deletions, SegReader_Doc_Max(seg_reader), (int32_t)Seg_Get_Count(self->segment) ); if (seg_num <= cutoff) { THROW(ERR, "Segment %o violates cutoff (%i64 <= %i64)", SegReader_Get_Seg_Name(seg_reader), seg_num, cutoff); } SegWriter_Merge_Segment(self->seg_writer, seg_reader, doc_map); merge_happened = true; DECREF(deletions); DECREF(doc_map); } // Write out new deletions. if (DelWriter_Updated(self->del_writer)) { // Only write out if they haven't all been applied. if (VA_Get_Size(to_merge) != num_seg_readers) { DelWriter_Finish(self->del_writer); } } DECREF(to_merge); return merge_happened; }
void BGMerger_Prepare_Commit_IMP(BackgroundMerger *self) { BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self); Vector *seg_readers = PolyReader_Get_Seg_Readers(ivars->polyreader); uint32_t num_seg_readers = Vec_Get_Size(seg_readers); uint32_t segs_merged = 0; if (ivars->prepared) { THROW(ERR, "Can't call Prepare_Commit() more than once"); } // Maybe merge existing index data. if (num_seg_readers) { segs_merged = S_maybe_merge(self); } if (!segs_merged) { // Nothing merged. Leave `needs_commit` false and bail out. ivars->prepared = true; return; } // Finish the segment and write a new snapshot file. else { Folder *folder = ivars->folder; Snapshot *snapshot = ivars->snapshot; // Write out new deletions. if (DelWriter_Updated(ivars->del_writer)) { // Only write out if they haven't all been applied. if (segs_merged != num_seg_readers) { DelWriter_Finish(ivars->del_writer); } } // Finish the segment. SegWriter_Finish(ivars->seg_writer); // Grab the write lock. S_obtain_write_lock(self); if (!ivars->write_lock) { RETHROW(INCREF(Err_get_error())); } // Write temporary snapshot file. DECREF(ivars->snapfile); String *snapfile = IxManager_Make_Snapshot_Filename(ivars->manager); ivars->snapfile = Str_Cat_Trusted_Utf8(snapfile, ".temp", 5); DECREF(snapfile); Folder_Delete(folder, ivars->snapfile); Snapshot_Write_File(snapshot, folder, ivars->snapfile); // Determine whether the index has been updated while this background // merge process was running. String *start_snapfile = Snapshot_Get_Path(PolyReader_Get_Snapshot(ivars->polyreader)); Snapshot *latest_snapshot = Snapshot_Read_File(Snapshot_new(), ivars->folder, NULL); String *latest_snapfile = Snapshot_Get_Path(latest_snapshot); bool index_updated = !Str_Equals(start_snapfile, (Obj*)latest_snapfile); if (index_updated) { /* See if new deletions have been applied since this * background merge process started against any of the * segments we just merged away. If that's true, we need to * write another segment which applies the deletions against * the new composite segment. */ S_merge_updated_deletions(self); // Add the fresh content to our snapshot. (It's important to // run this AFTER S_merge_updated_deletions, because otherwise // we couldn't tell whether the deletion counts changed.) Vector *files = Snapshot_List(latest_snapshot); for (uint32_t i = 0, max = Vec_Get_Size(files); i < max; i++) { String *file = (String*)Vec_Fetch(files, i); if (Str_Starts_With_Utf8(file, "seg_", 4)) { int64_t gen = (int64_t)IxFileNames_extract_gen(file); if (gen > ivars->cutoff) { Snapshot_Add_Entry(ivars->snapshot, file); } } } DECREF(files); // Since the snapshot content has changed, we need to rewrite it. Folder_Delete(folder, ivars->snapfile); Snapshot_Write_File(snapshot, folder, ivars->snapfile); } DECREF(latest_snapshot); ivars->needs_commit = true; } // Close reader, so that we can delete its files if appropriate. PolyReader_Close(ivars->polyreader); ivars->prepared = true; }