static void test_fields(TestBatch *batch) { Segment *segment = Seg_new(1); ZombieCharBuf *foo = ZCB_WRAP_STR("foo",3 ); ZombieCharBuf *bar = ZCB_WRAP_STR("bar", 3); ZombieCharBuf *baz = ZCB_WRAP_STR("baz", 3); int32_t field_num; field_num = Seg_Add_Field(segment, (CharBuf*)foo); TEST_TRUE(batch, field_num == 1, "Add_Field returns field number, and field numbers start at 1"); field_num = Seg_Add_Field(segment, (CharBuf*)bar); TEST_TRUE(batch, field_num == 2, "add a second field"); field_num = Seg_Add_Field(segment, (CharBuf*)foo); TEST_TRUE(batch, field_num == 1, "Add_Field returns existing field number if field is already known"); TEST_TRUE(batch, ZCB_Equals(bar, (Obj*)Seg_Field_Name(segment, 2)), "Field_Name"); TEST_TRUE(batch, Seg_Field_Name(segment, 3) == NULL, "Field_Name returns NULL for unknown field number"); TEST_TRUE(batch, Seg_Field_Num(segment, (CharBuf*)bar) == 2, "Field_Num"); TEST_TRUE(batch, Seg_Field_Num(segment, (CharBuf*)baz) == 0, "Field_Num returns 0 for unknown field name"); DECREF(segment); }
static void test_Write_File_and_Read_File(TestBatchRunner *runner) { RAMFolder *folder = RAMFolder_new(NULL); Segment *segment = Seg_new(100); Segment *got = Seg_new(100); String *meta; String *flotsam = (String*)SSTR_WRAP_UTF8("flotsam", 7); String *jetsam = (String*)SSTR_WRAP_UTF8("jetsam", 6); Seg_Set_Count(segment, 111); Seg_Store_Metadata_Utf8(segment, "foo", 3, (Obj*)Str_newf("bar")); Seg_Add_Field(segment, flotsam); Seg_Add_Field(segment, jetsam); RAMFolder_MkDir(folder, Seg_Get_Name(segment)); Seg_Write_File(segment, (Folder*)folder); Seg_Read_File(got, (Folder*)folder); TEST_TRUE(runner, Seg_Get_Count(got) == Seg_Get_Count(segment), "Round-trip count through file"); TEST_TRUE(runner, Seg_Field_Num(got, jetsam) == Seg_Field_Num(segment, jetsam), "Round trip field names through file"); meta = (String*)Seg_Fetch_Metadata_Utf8(got, "foo", 3); TEST_TRUE(runner, meta && Str_Is_A(meta, STRING) && Str_Equals_Utf8(meta, "bar", 3), "Round trip metadata through file"); DECREF(got); DECREF(segment); DECREF(folder); }
static void test_Write_File_and_Read_File(TestBatch *batch) { RAMFolder *folder = RAMFolder_new(NULL); Segment *segment = Seg_new(100); Segment *got = Seg_new(100); CharBuf *meta; CharBuf *flotsam = (CharBuf*)ZCB_WRAP_STR("flotsam", 7); CharBuf *jetsam = (CharBuf*)ZCB_WRAP_STR("jetsam", 6); Seg_Set_Count(segment, 111); Seg_Store_Metadata_Str(segment, "foo", 3, (Obj*)CB_newf("bar")); Seg_Add_Field(segment, flotsam); Seg_Add_Field(segment, jetsam); RAMFolder_MkDir(folder, Seg_Get_Name(segment)); Seg_Write_File(segment, (Folder*)folder); Seg_Read_File(got, (Folder*)folder); TEST_TRUE(batch, Seg_Get_Count(got) == Seg_Get_Count(segment), "Round-trip count through file"); TEST_TRUE(batch, Seg_Field_Num(got, jetsam) == Seg_Field_Num(segment, jetsam), "Round trip field names through file"); meta = (CharBuf*)Seg_Fetch_Metadata_Str(got, "foo", 3); TEST_TRUE(batch, meta && CB_Is_A(meta, CHARBUF) && CB_Equals_Str(meta, "bar", 3), "Round trip metadata through file"); DECREF(got); DECREF(segment); DECREF(folder); }
static void test_fields(TestBatchRunner *runner) { Segment *segment = Seg_new(1); StackString *foo = SSTR_WRAP_UTF8("foo", 3); StackString *bar = SSTR_WRAP_UTF8("bar", 3); StackString *baz = SSTR_WRAP_UTF8("baz", 3); int32_t field_num; field_num = Seg_Add_Field(segment, (String*)foo); TEST_TRUE(runner, field_num == 1, "Add_Field returns field number, and field numbers start at 1"); field_num = Seg_Add_Field(segment, (String*)bar); TEST_TRUE(runner, field_num == 2, "add a second field"); field_num = Seg_Add_Field(segment, (String*)foo); TEST_TRUE(runner, field_num == 1, "Add_Field returns existing field number if field is already known"); TEST_TRUE(runner, SStr_Equals(bar, (Obj*)Seg_Field_Name(segment, 2)), "Field_Name"); TEST_TRUE(runner, Seg_Field_Name(segment, 3) == NULL, "Field_Name returns NULL for unknown field number"); TEST_TRUE(runner, Seg_Field_Num(segment, (String*)bar) == 2, "Field_Num"); TEST_TRUE(runner, Seg_Field_Num(segment, (String*)baz) == 0, "Field_Num returns 0 for unknown field name"); DECREF(segment); }
static InverterEntry* S_fetch_entry(InverterIVARS *ivars, CharBuf *field) { Schema *const schema = ivars->schema; int32_t field_num = Seg_Field_Num(ivars->segment, field); if (!field_num) { // This field seems not to be in the segment yet. Try to find it in // the Schema. if (Schema_Fetch_Type(schema, field)) { // The field is in the Schema. Get a field num from the Segment. field_num = Seg_Add_Field(ivars->segment, field); } else { // We've truly failed to find the field. The user must // not have spec'd it. THROW(ERR, "Unknown field name: '%o'", field); } } InverterEntry *entry = (InverterEntry*)VA_Fetch(ivars->entry_pool, field_num); if (!entry) { entry = InvEntry_new(schema, (CharBuf*)field, field_num); VA_Store(ivars->entry_pool, field_num, (Obj*)entry); } return entry; }
void Indexer_add_index(Indexer *self, Obj *index) { Folder *other_folder = NULL; IndexReader *reader = NULL; if (Obj_Is_A(index, FOLDER)) { other_folder = (Folder*)INCREF(index); } else if (Obj_Is_A(index, CHARBUF)) { other_folder = (Folder*)FSFolder_new((CharBuf*)index); } else { THROW(ERR, "Invalid type for 'index': %o", Obj_Get_Class_Name(index)); } reader = IxReader_open((Obj*)other_folder, NULL, NULL); if (reader == NULL) { THROW(ERR, "Index doesn't seem to contain any data"); } else { Schema *schema = self->schema; Schema *other_schema = IxReader_Get_Schema(reader); VArray *other_fields = Schema_All_Fields(other_schema); VArray *seg_readers = IxReader_Seg_Readers(reader); uint32_t i, max; // Validate schema compatibility and add fields. Schema_Eat(schema, other_schema); // Add fields to Segment. for (i = 0, max = VA_Get_Size(other_fields); i < max; i++) { CharBuf *other_field = (CharBuf*)VA_Fetch(other_fields, i); Seg_Add_Field(self->segment, other_field); } DECREF(other_fields); // Add all segments. for (i = 0, max = VA_Get_Size(seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(seg_readers, i); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Fetch( seg_reader, VTable_Get_Name(DELETIONSREADER)); Matcher *deletions = del_reader ? DelReader_Iterator(del_reader) : NULL; I32Array *doc_map = DelWriter_Generate_Doc_Map(self->del_writer, deletions, SegReader_Doc_Max(seg_reader), (int32_t)Seg_Get_Count(self->segment) ); SegWriter_Add_Segment(self->seg_writer, seg_reader, doc_map); DECREF(deletions); DECREF(doc_map); } DECREF(seg_readers); } DECREF(reader); DECREF(other_folder); }
void Indexer_Add_Index_IMP(Indexer *self, Obj *index) { IndexerIVARS *const ivars = Indexer_IVARS(self); Folder *other_folder = NULL; IndexReader *reader = NULL; if (Obj_is_a(index, FOLDER)) { other_folder = (Folder*)INCREF(index); } else if (Obj_is_a(index, STRING)) { other_folder = (Folder*)FSFolder_new((String*)index); } else { THROW(ERR, "Invalid type for 'index': %o", Obj_get_class_name(index)); } reader = IxReader_open((Obj*)other_folder, NULL, NULL); if (reader == NULL) { THROW(ERR, "Index doesn't seem to contain any data"); } else { Schema *schema = ivars->schema; Schema *other_schema = IxReader_Get_Schema(reader); Vector *other_fields = Schema_All_Fields(other_schema); Vector *seg_readers = IxReader_Seg_Readers(reader); // Validate schema compatibility and add fields. Schema_Eat(schema, other_schema); // Add fields to Segment. for (size_t i = 0, max = Vec_Get_Size(other_fields); i < max; i++) { String *other_field = (String*)Vec_Fetch(other_fields, i); Seg_Add_Field(ivars->segment, other_field); } DECREF(other_fields); // Add all segments. for (size_t i = 0, max = Vec_Get_Size(seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(seg_readers, i); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Fetch( seg_reader, Class_Get_Name(DELETIONSREADER)); Matcher *deletions = del_reader ? DelReader_Iterator(del_reader) : NULL; I32Array *doc_map = DelWriter_Generate_Doc_Map( ivars->del_writer, deletions, SegReader_Doc_Max(seg_reader), (int32_t)Seg_Get_Count(ivars->segment)); SegWriter_Add_Segment(ivars->seg_writer, seg_reader, doc_map); DECREF(deletions); DECREF(doc_map); } DECREF(seg_readers); } DECREF(reader); DECREF(other_folder); }
bool Seg_Read_File_IMP(Segment *self, Folder *folder) { SegmentIVARS *const ivars = Seg_IVARS(self); String *filename = Str_newf("%o/segmeta.json", ivars->name); Hash *metadata = (Hash*)Json_slurp_json(folder, filename); Hash *my_metadata; // Bail unless the segmeta file was read successfully. DECREF(filename); if (!metadata) { return false; } CERTIFY(metadata, HASH); // Grab metadata for the Segment object itself. DECREF(ivars->metadata); ivars->metadata = metadata; my_metadata = (Hash*)CERTIFY(Hash_Fetch_Utf8(ivars->metadata, "segmeta", 7), HASH); // Assign. Obj *count = Hash_Fetch_Utf8(my_metadata, "count", 5); if (!count) { count = Hash_Fetch_Utf8(my_metadata, "doc_count", 9); } if (!count) { THROW(ERR, "Missing 'count'"); } else { ivars->count = Json_obj_to_i64(count); } // Get list of field nums. Vector *source_by_num = (Vector*)Hash_Fetch_Utf8(my_metadata, "field_names", 11); size_t num_fields = source_by_num ? Vec_Get_Size(source_by_num) : 0; if (source_by_num == NULL) { THROW(ERR, "Failed to extract 'field_names' from metadata"); } // Init. DECREF(ivars->by_num); DECREF(ivars->by_name); ivars->by_num = Vec_new(num_fields); ivars->by_name = Hash_new(num_fields); // Copy the list of fields from the source. for (size_t i = 0; i < num_fields; i++) { String *name = (String*)Vec_Fetch(source_by_num, i); Seg_Add_Field(self, name); } return true; }
bool_t Seg_read_file(Segment *self, Folder *folder) { CharBuf *filename = CB_newf("%o/segmeta.json", self->name); Hash *metadata = (Hash*)Json_slurp_json(folder, filename); Hash *my_metadata; // Bail unless the segmeta file was read successfully. DECREF(filename); if (!metadata) { return false; } CERTIFY(metadata, HASH); // Grab metadata for the Segment object itself. DECREF(self->metadata); self->metadata = metadata; my_metadata = (Hash*)CERTIFY(Hash_Fetch_Str(self->metadata, "segmeta", 7), HASH); // Assign. Obj *count = Hash_Fetch_Str(my_metadata, "count", 5); if (!count) { count = Hash_Fetch_Str(my_metadata, "doc_count", 9); } if (!count) { THROW(ERR, "Missing 'count'"); } else { self->count = Obj_To_I64(count); } // Get list of field nums. VArray *source_by_num = (VArray*)Hash_Fetch_Str(my_metadata, "field_names", 11); uint32_t num_fields = source_by_num ? VA_Get_Size(source_by_num) : 0; if (source_by_num == NULL) { THROW(ERR, "Failed to extract 'field_names' from metadata"); } // Init. DECREF(self->by_num); DECREF(self->by_name); self->by_num = VA_new(num_fields); self->by_name = Hash_new(num_fields); // Copy the list of fields from the source. for (uint32_t i = 0; i < num_fields; i++) { CharBuf *name = (CharBuf*)VA_Fetch(source_by_num, i); Seg_Add_Field(self, name); } return true; }
Indexer* Indexer_init(Indexer *self, Schema *schema, Obj *index, IndexManager *manager, int32_t flags) { bool_t create = (flags & Indexer_CREATE) ? true : false; bool_t truncate = (flags & Indexer_TRUNCATE) ? true : false; Folder *folder = S_init_folder(index, create); Lock *write_lock; CharBuf *latest_snapfile; Snapshot *latest_snapshot = Snapshot_new(); // Init. self->stock_doc = Doc_new(NULL, 0); self->truncate = false; self->optimize = false; self->prepared = false; self->needs_commit = false; self->snapfile = NULL; self->merge_lock = NULL; // Assign. self->folder = folder; self->manager = manager ? (IndexManager*)INCREF(manager) : IxManager_new(NULL, NULL); IxManager_Set_Folder(self->manager, folder); // Get a write lock for this folder. write_lock = IxManager_Make_Write_Lock(self->manager); Lock_Clear_Stale(write_lock); if (Lock_Obtain(write_lock)) { // Only assign if successful, otherwise DESTROY unlocks -- bad! self->write_lock = write_lock; } else { DECREF(write_lock); DECREF(self); RETHROW(INCREF(Err_get_error())); } // Find the latest snapshot or create a new one. latest_snapfile = IxFileNames_latest_snapshot(folder); if (latest_snapfile) { Snapshot_Read_File(latest_snapshot, folder, latest_snapfile); } // Look for an existing Schema if one wasn't supplied. if (schema) { self->schema = (Schema*)INCREF(schema); } else { if (!latest_snapfile) { THROW(ERR, "No Schema supplied, and can't find one in the index"); } else { CharBuf *schema_file = S_find_schema_file(latest_snapshot); Hash *dump = (Hash*)Json_slurp_json(folder, schema_file); if (dump) { // read file successfully self->schema = (Schema*)CERTIFY( VTable_Load_Obj(SCHEMA, (Obj*)dump), SCHEMA); schema = self->schema; DECREF(dump); schema_file = NULL; } else { THROW(ERR, "Failed to parse %o", schema_file); } } } // If we're clobbering, start with an empty Snapshot and an empty // PolyReader. Otherwise, start with the most recent Snapshot and an // up-to-date PolyReader. if (truncate) { self->snapshot = Snapshot_new(); self->polyreader = PolyReader_new(schema, folder, NULL, NULL, NULL); self->truncate = true; } else { // TODO: clone most recent snapshot rather than read it twice. self->snapshot = (Snapshot*)INCREF(latest_snapshot); self->polyreader = latest_snapfile ? PolyReader_open((Obj*)folder, NULL, NULL) : PolyReader_new(schema, folder, NULL, NULL, NULL); if (latest_snapfile) { // Make sure than any existing fields which may have been // dynamically added during past indexing sessions get added. Schema *old_schema = PolyReader_Get_Schema(self->polyreader); Schema_Eat(schema, old_schema); } } // Zap detritus from previous sessions. { // Note: we have to feed FilePurger with the most recent snapshot file // now, but with the Indexer's snapshot later. FilePurger *file_purger = FilePurger_new(folder, latest_snapshot, self->manager); FilePurger_Purge(file_purger); DECREF(file_purger); } // Create a new segment. { int64_t new_seg_num = IxManager_Highest_Seg_Num(self->manager, latest_snapshot) + 1; Lock *merge_lock = IxManager_Make_Merge_Lock(self->manager); uint32_t i, max; if (Lock_Is_Locked(merge_lock)) { // If there's a background merge process going on, stay out of its // way. Hash *merge_data = IxManager_Read_Merge_Data(self->manager); Obj *cutoff_obj = merge_data ? Hash_Fetch_Str(merge_data, "cutoff", 6) : NULL; if (!cutoff_obj) { DECREF(merge_lock); DECREF(merge_data); THROW(ERR, "Background merge detected, but can't read merge data"); } else { int64_t cutoff = Obj_To_I64(cutoff_obj); if (cutoff >= new_seg_num) { new_seg_num = cutoff + 1; } } DECREF(merge_data); } self->segment = Seg_new(new_seg_num); // Add all known fields to Segment. { VArray *fields = Schema_All_Fields(schema); for (i = 0, max = VA_Get_Size(fields); i < max; i++) { Seg_Add_Field(self->segment, (CharBuf*)VA_Fetch(fields, i)); } DECREF(fields); } DECREF(merge_lock); } // Create new SegWriter and FilePurger. self->file_purger = FilePurger_new(folder, self->snapshot, self->manager); self->seg_writer = SegWriter_new(self->schema, self->snapshot, self->segment, self->polyreader); SegWriter_Prep_Seg_Dir(self->seg_writer); // Grab a local ref to the DeletionsWriter. self->del_writer = (DeletionsWriter*)INCREF( SegWriter_Get_Del_Writer(self->seg_writer)); DECREF(latest_snapfile); DECREF(latest_snapshot); return self; }
BackgroundMerger* BGMerger_init(BackgroundMerger *self, Obj *index, IndexManager *manager) { BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self); Folder *folder = S_init_folder(index); // Init. ivars->optimize = false; ivars->prepared = false; ivars->needs_commit = false; ivars->snapfile = NULL; ivars->doc_maps = Hash_new(0); // Assign. ivars->folder = folder; if (manager) { ivars->manager = (IndexManager*)INCREF(manager); } else { ivars->manager = IxManager_new(NULL, NULL); IxManager_Set_Write_Lock_Timeout(ivars->manager, 10000); } IxManager_Set_Folder(ivars->manager, folder); // Obtain write lock (which we'll only hold briefly), then merge lock. S_obtain_write_lock(self); if (!ivars->write_lock) { DECREF(self); RETHROW(INCREF(Err_get_error())); } S_obtain_merge_lock(self); if (!ivars->merge_lock) { DECREF(self); RETHROW(INCREF(Err_get_error())); } // Find the latest snapshot. If there's no index content, bail early. ivars->snapshot = Snapshot_Read_File(Snapshot_new(), folder, NULL); if (!Snapshot_Get_Path(ivars->snapshot)) { S_release_write_lock(self); S_release_merge_lock(self); return self; } // Create FilePurger. Zap detritus from previous sessions. ivars->file_purger = FilePurger_new(folder, ivars->snapshot, ivars->manager); FilePurger_Purge(ivars->file_purger); // Open a PolyReader, passing in the IndexManager so we get a read lock on // the Snapshot's files -- so that Indexers don't zap our files while // we're operating in the background. ivars->polyreader = PolyReader_open((Obj*)folder, NULL, ivars->manager); // Clone the PolyReader's schema. Obj *dump = (Obj*)Schema_Dump(PolyReader_Get_Schema(ivars->polyreader)); ivars->schema = (Schema*)CERTIFY(Freezer_load(dump), SCHEMA); DECREF(dump); // Create new Segment. int64_t new_seg_num = IxManager_Highest_Seg_Num(ivars->manager, ivars->snapshot) + 1; Vector *fields = Schema_All_Fields(ivars->schema); ivars->segment = Seg_new(new_seg_num); for (uint32_t i = 0, max = Vec_Get_Size(fields); i < max; i++) { Seg_Add_Field(ivars->segment, (String*)Vec_Fetch(fields, i)); } DECREF(fields); // Our "cutoff" is the segment this BackgroundMerger will write. Now that // we've determined the cutoff, write the merge data file. ivars->cutoff = Seg_Get_Number(ivars->segment); IxManager_Write_Merge_Data(ivars->manager, ivars->cutoff); /* Create the SegWriter but hold off on preparing the new segment * directory -- because if we don't need to merge any segments we don't * need it. (We've reserved the dir by plopping down the merge.json * file.) */ ivars->seg_writer = SegWriter_new(ivars->schema, ivars->snapshot, ivars->segment, ivars->polyreader); // Grab a local ref to the DeletionsWriter. ivars->del_writer = (DeletionsWriter*)INCREF(SegWriter_Get_Del_Writer(ivars->seg_writer)); // Release the write lock. Now new Indexers can start while we work in // the background. S_release_write_lock(self); return self; }