void Indexer_add_index(Indexer *self, Obj *index) { Folder *other_folder = NULL; IndexReader *reader = NULL; if (Obj_Is_A(index, FOLDER)) { other_folder = (Folder*)INCREF(index); } else if (Obj_Is_A(index, CHARBUF)) { other_folder = (Folder*)FSFolder_new((CharBuf*)index); } else { THROW(ERR, "Invalid type for 'index': %o", Obj_Get_Class_Name(index)); } reader = IxReader_open((Obj*)other_folder, NULL, NULL); if (reader == NULL) { THROW(ERR, "Index doesn't seem to contain any data"); } else { Schema *schema = self->schema; Schema *other_schema = IxReader_Get_Schema(reader); VArray *other_fields = Schema_All_Fields(other_schema); VArray *seg_readers = IxReader_Seg_Readers(reader); uint32_t i, max; // Validate schema compatibility and add fields. Schema_Eat(schema, other_schema); // Add fields to Segment. for (i = 0, max = VA_Get_Size(other_fields); i < max; i++) { CharBuf *other_field = (CharBuf*)VA_Fetch(other_fields, i); Seg_Add_Field(self->segment, other_field); } DECREF(other_fields); // Add all segments. for (i = 0, max = VA_Get_Size(seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(seg_readers, i); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Fetch( seg_reader, VTable_Get_Name(DELETIONSREADER)); Matcher *deletions = del_reader ? DelReader_Iterator(del_reader) : NULL; I32Array *doc_map = DelWriter_Generate_Doc_Map(self->del_writer, deletions, SegReader_Doc_Max(seg_reader), (int32_t)Seg_Get_Count(self->segment) ); SegWriter_Add_Segment(self->seg_writer, seg_reader, doc_map); DECREF(deletions); DECREF(doc_map); } DECREF(seg_readers); } DECREF(reader); DECREF(other_folder); }
void Indexer_Add_Index_IMP(Indexer *self, Obj *index) { IndexerIVARS *const ivars = Indexer_IVARS(self); Folder *other_folder = NULL; IndexReader *reader = NULL; if (Obj_is_a(index, FOLDER)) { other_folder = (Folder*)INCREF(index); } else if (Obj_is_a(index, STRING)) { other_folder = (Folder*)FSFolder_new((String*)index); } else { THROW(ERR, "Invalid type for 'index': %o", Obj_get_class_name(index)); } reader = IxReader_open((Obj*)other_folder, NULL, NULL); if (reader == NULL) { THROW(ERR, "Index doesn't seem to contain any data"); } else { Schema *schema = ivars->schema; Schema *other_schema = IxReader_Get_Schema(reader); Vector *other_fields = Schema_All_Fields(other_schema); Vector *seg_readers = IxReader_Seg_Readers(reader); // Validate schema compatibility and add fields. Schema_Eat(schema, other_schema); // Add fields to Segment. for (size_t i = 0, max = Vec_Get_Size(other_fields); i < max; i++) { String *other_field = (String*)Vec_Fetch(other_fields, i); Seg_Add_Field(ivars->segment, other_field); } DECREF(other_fields); // Add all segments. for (size_t i = 0, max = Vec_Get_Size(seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(seg_readers, i); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Fetch( seg_reader, Class_Get_Name(DELETIONSREADER)); Matcher *deletions = del_reader ? DelReader_Iterator(del_reader) : NULL; I32Array *doc_map = DelWriter_Generate_Doc_Map( ivars->del_writer, deletions, SegReader_Doc_Max(seg_reader), (int32_t)Seg_Get_Count(ivars->segment)); SegWriter_Add_Segment(ivars->seg_writer, seg_reader, doc_map); DECREF(deletions); DECREF(doc_map); } DECREF(seg_readers); } DECREF(reader); DECREF(other_folder); }
DefaultDeletionsWriter* DefDelWriter_init(DefaultDeletionsWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader) { DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader); DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); ivars->seg_readers = PolyReader_Seg_Readers(polyreader); uint32_t num_seg_readers = VA_Get_Size(ivars->seg_readers); ivars->seg_starts = PolyReader_Offsets(polyreader); ivars->bit_vecs = VA_new(num_seg_readers); ivars->updated = (bool*)CALLOCATE(num_seg_readers, sizeof(bool)); ivars->searcher = IxSearcher_new((Obj*)polyreader); ivars->name_to_tick = Hash_new(num_seg_readers); // Materialize a BitVector of deletions for each segment. for (uint32_t i = 0; i < num_seg_readers; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i); BitVector *bit_vec = BitVec_new(SegReader_Doc_Max(seg_reader)); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Fetch( seg_reader, Class_Get_Name(DELETIONSREADER)); Matcher *seg_dels = del_reader ? DelReader_Iterator(del_reader) : NULL; if (seg_dels) { int32_t del; while (0 != (del = Matcher_Next(seg_dels))) { BitVec_Set(bit_vec, del); } DECREF(seg_dels); } VA_Store(ivars->bit_vecs, i, (Obj*)bit_vec); Hash_Store(ivars->name_to_tick, (Obj*)SegReader_Get_Seg_Name(seg_reader), (Obj*)Int32_new(i)); } return self; }
static bool S_merge_updated_deletions(BackgroundMerger *self) { BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self); Hash *updated_deletions = NULL; PolyReader *new_polyreader = PolyReader_open((Obj*)ivars->folder, NULL, NULL); Vector *new_seg_readers = PolyReader_Get_Seg_Readers(new_polyreader); Vector *old_seg_readers = PolyReader_Get_Seg_Readers(ivars->polyreader); Hash *new_segs = Hash_new(Vec_Get_Size(new_seg_readers)); for (uint32_t i = 0, max = Vec_Get_Size(new_seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(new_seg_readers, i); String *seg_name = SegReader_Get_Seg_Name(seg_reader); Hash_Store(new_segs, seg_name, INCREF(seg_reader)); } for (uint32_t i = 0, max = Vec_Get_Size(old_seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(old_seg_readers, i); String *seg_name = SegReader_Get_Seg_Name(seg_reader); // If this segment was merged away... if (Hash_Fetch(ivars->doc_maps, seg_name)) { SegReader *new_seg_reader = (SegReader*)CERTIFY( Hash_Fetch(new_segs, seg_name), SEGREADER); int32_t old_del_count = SegReader_Del_Count(seg_reader); int32_t new_del_count = SegReader_Del_Count(new_seg_reader); // ... were any new deletions applied against it? if (old_del_count != new_del_count) { DeletionsReader *del_reader = (DeletionsReader*)SegReader_Obtain( new_seg_reader, Class_Get_Name(DELETIONSREADER)); if (!updated_deletions) { updated_deletions = Hash_new(max); } Hash_Store(updated_deletions, seg_name, (Obj*)DelReader_Iterator(del_reader)); } } } DECREF(new_polyreader); DECREF(new_segs); if (!updated_deletions) { return false; } else { PolyReader *merge_polyreader = PolyReader_open((Obj*)ivars->folder, ivars->snapshot, NULL); Vector *merge_seg_readers = PolyReader_Get_Seg_Readers(merge_polyreader); Snapshot *latest_snapshot = Snapshot_Read_File(Snapshot_new(), ivars->folder, NULL); int64_t new_seg_num = IxManager_Highest_Seg_Num(ivars->manager, latest_snapshot) + 1; Segment *new_segment = Seg_new(new_seg_num); SegWriter *seg_writer = SegWriter_new(ivars->schema, ivars->snapshot, new_segment, merge_polyreader); DeletionsWriter *del_writer = SegWriter_Get_Del_Writer(seg_writer); int64_t merge_seg_num = Seg_Get_Number(ivars->segment); uint32_t seg_tick = INT32_MAX; int32_t offset = INT32_MAX; SegWriter_Prep_Seg_Dir(seg_writer); for (uint32_t i = 0, max = Vec_Get_Size(merge_seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(merge_seg_readers, i); if (SegReader_Get_Seg_Num(seg_reader) == merge_seg_num) { I32Array *offsets = PolyReader_Offsets(merge_polyreader); seg_tick = i; offset = I32Arr_Get(offsets, seg_tick); DECREF(offsets); } } if (offset == INT32_MAX) { THROW(ERR, "Failed sanity check"); } HashIterator *iter = HashIter_new(updated_deletions); while (HashIter_Next(iter)) { String *seg_name = HashIter_Get_Key(iter); Matcher *deletions = (Matcher*)HashIter_Get_Value(iter); I32Array *doc_map = (I32Array*)CERTIFY( Hash_Fetch(ivars->doc_maps, seg_name), I32ARRAY); int32_t del; while (0 != (del = Matcher_Next(deletions))) { // Find the slot where the deleted doc resides in the // rewritten segment. If the doc was already deleted when we // were merging, do nothing. int32_t remapped = I32Arr_Get(doc_map, del); if (remapped) { // It's a new deletion, so carry it forward and zap it in // the rewritten segment. DelWriter_Delete_By_Doc_ID(del_writer, remapped + offset); } } } DECREF(iter); // Finish the segment and clean up. DelWriter_Finish(del_writer); SegWriter_Finish(seg_writer); DECREF(seg_writer); DECREF(new_segment); DECREF(latest_snapshot); DECREF(merge_polyreader); DECREF(updated_deletions); } return true; }