void Inverter_Invert_Doc_IMP(Inverter *self, Doc *doc) { InverterIVARS *const ivars = Inverter_IVARS(self); Hash *const fields = (Hash*)Doc_Get_Fields(doc); // Prepare for the new doc. Inverter_Set_Doc(self, doc); // Extract and invert the doc's fields. HashIterator *iter = HashIter_new(fields); while (HashIter_Next(iter)) { String *field = HashIter_Get_Key(iter); Obj *obj = HashIter_Get_Value(iter); InverterEntry *inventry = S_fetch_entry(ivars, field); InverterEntryIVARS *inventry_ivars = InvEntry_IVARS(inventry); FieldType *type = inventry_ivars->type; // Get the field value. switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: { CERTIFY(obj, STRING); break; } case FType_BLOB: { CERTIFY(obj, BLOB); break; } case FType_INT32: case FType_INT64: { CERTIFY(obj, INTEGER); break; } case FType_FLOAT32: case FType_FLOAT64: { CERTIFY(obj, FLOAT); break; } default: THROW(ERR, "Unrecognized type: %o", type); } if (inventry_ivars->value != obj) { DECREF(inventry_ivars->value); inventry_ivars->value = INCREF(obj); } Inverter_Add_Field(self, inventry); } DECREF(iter); }
Obj* S_dump_hash(Hash *hash) { Hash *dump = Hash_new(Hash_Get_Size(hash)); HashIterator *iter = HashIter_new(hash); while (HashIter_Next(iter)) { String *key = HashIter_Get_Key(iter); Obj *value = HashIter_Get_Value(iter); Hash_Store(dump, key, Freezer_dump(value)); } DECREF(iter); return (Obj*)dump; }
void Freezer_serialize_hash(Hash *hash, OutStream *outstream) { uint32_t hash_size = Hash_Get_Size(hash); OutStream_Write_C32(outstream, hash_size); HashIterator *iter = HashIter_new(hash); while (HashIter_Next(iter)) { String *key = HashIter_Get_Key(iter); Obj *val = HashIter_Get_Value(iter); Freezer_serialize_string(key, outstream); FREEZE(val, outstream); } DECREF(iter); }
void Schema_Eat_IMP(Schema *self, Schema *other) { if (!Schema_is_a(self, Schema_get_class(other))) { THROW(ERR, "%o not a descendent of %o", Schema_get_class_name(self), Schema_get_class_name(other)); } SchemaIVARS *const ovars = Schema_IVARS(other); HashIterator *iter = HashIter_new(ovars->types); while (HashIter_Next(iter)) { String *field = HashIter_Get_Key(iter); FieldType *type = (FieldType*)HashIter_Get_Value(iter); Schema_Spec_Field(self, field, type); } DECREF(iter); }
Hash* Schema_Dump_IMP(Schema *self) { SchemaIVARS *const ivars = Schema_IVARS(self); Hash *dump = Hash_new(0); Hash *type_dumps = Hash_new(Hash_Get_Size(ivars->types)); // Record class name, store dumps of unique Analyzers. Hash_Store_Utf8(dump, "_class", 6, (Obj*)Str_Clone(Schema_get_class_name(self))); Hash_Store_Utf8(dump, "analyzers", 9, Freezer_dump((Obj*)ivars->uniq_analyzers)); // Dump FieldTypes. Hash_Store_Utf8(dump, "fields", 6, (Obj*)type_dumps); HashIterator *iter = HashIter_new(ivars->types); while (HashIter_Next(iter)) { String *field = HashIter_Get_Key(iter); FieldType *type = (FieldType*)HashIter_Get_Value(iter); Class *type_class = FType_get_class(type); // Dump known types to simplified format. if (type_class == FULLTEXTTYPE) { FullTextType *fttype = (FullTextType*)type; Hash *type_dump = FullTextType_Dump_For_Schema(fttype); Analyzer *analyzer = FullTextType_Get_Analyzer(fttype); uint32_t tick = S_find_in_array(ivars->uniq_analyzers, (Obj*)analyzer); // Store the tick which references a unique analyzer. Hash_Store_Utf8(type_dump, "analyzer", 8, (Obj*)Str_newf("%u32", tick)); Hash_Store(type_dumps, field, (Obj*)type_dump); } else if (type_class == STRINGTYPE || type_class == BLOBTYPE) { Hash *type_dump = FType_Dump_For_Schema(type); Hash_Store(type_dumps, field, (Obj*)type_dump); } // Unknown FieldType type, so punt. else { Hash_Store(type_dumps, field, FType_Dump(type)); } } DECREF(iter); return dump; }
void DefDelWriter_Merge_Segment_IMP(DefaultDeletionsWriter *self, SegReader *reader, I32Array *doc_map) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); UNUSED_VAR(doc_map); Segment *segment = SegReader_Get_Segment(reader); Hash *del_meta = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "deletions", 9); if (del_meta) { Vector *seg_readers = ivars->seg_readers; Hash *files = (Hash*)Hash_Fetch_Utf8(del_meta, "files", 5); if (files) { HashIterator *iter = HashIter_new(files); while (HashIter_Next(iter)) { String *seg = HashIter_Get_Key(iter); Hash *mini_meta = (Hash*)HashIter_Get_Value(iter); /* Find the segment the deletions from the SegReader * we're adding correspond to. If it's gone, we don't * need to worry about losing deletions files that point * at it. */ for (size_t i = 0, max = Vec_Get_Size(seg_readers); i < max; i++) { SegReader *candidate = (SegReader*)Vec_Fetch(seg_readers, i); String *candidate_name = Seg_Get_Name(SegReader_Get_Segment(candidate)); if (Str_Equals(seg, (Obj*)candidate_name)) { /* If the count hasn't changed, we're about to * merge away the most recent deletions file * pointing at this target segment -- so force a * new file to be written out. */ int32_t count = (int32_t)Json_obj_to_i64(Hash_Fetch_Utf8(mini_meta, "count", 5)); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Obtain( candidate, Class_Get_Name(DELETIONSREADER)); if (count == DelReader_Del_Count(del_reader)) { ivars->updated[i] = true; } break; } } } DECREF(iter); } } }
static Obj* S_load_from_hash(Hash *dump) { String *class_name = (String*)Hash_Fetch_Utf8(dump, "_class", 6); // Assume that the presence of the "_class" key paired with a valid class // name indicates the output of a dump() rather than an ordinary Hash. if (class_name && Str_is_a(class_name, STRING)) { Class *klass = Class_fetch_class(class_name); if (!klass) { String *parent_class_name = Class_find_parent_class(class_name); if (parent_class_name) { Class *parent = Class_singleton(parent_class_name, NULL); klass = Class_singleton(class_name, parent); DECREF(parent_class_name); } else { // TODO: Fix load() so that it works with ordinary hash keys // named "_class". THROW(ERR, "Can't find class '%o'", class_name); } } // Dispatch to an alternate Load() method. if (klass) { return S_load_via_load_method(klass, (Obj*)dump); } } // It's an ordinary Hash. Hash *loaded = Hash_new(Hash_Get_Size(dump)); HashIterator *iter = HashIter_new(dump); while (HashIter_Next(iter)) { String *key = HashIter_Get_Key(iter); Obj *value = HashIter_Get_Value(iter); Hash_Store(loaded, key, Freezer_load(value)); } DECREF(iter); return (Obj*)loaded; }
static void test_offsets(TestBatchRunner *runner) { Folder *folder = S_folder_with_contents(); CompoundFileWriter *cf_writer = CFWriter_new(folder); Hash *cf_metadata; Hash *files; CFWriter_Consolidate(cf_writer); cf_metadata = (Hash*)CERTIFY( Json_slurp_json(folder, cfmeta_file), HASH); files = (Hash*)CERTIFY( Hash_Fetch_Utf8(cf_metadata, "files", 5), HASH); bool offsets_ok = true; TEST_TRUE(runner, Hash_Get_Size(files) > 0, "Multiple files"); HashIterator *iter = HashIter_new(files); while (HashIter_Next(iter)) { String *file = HashIter_Get_Key(iter); Hash *stats = (Hash*)CERTIFY(HashIter_Get_Value(iter), HASH); Obj *offset = CERTIFY(Hash_Fetch_Utf8(stats, "offset", 6), OBJ); int64_t offs = Obj_To_I64(offset); if (offs % 8 != 0) { offsets_ok = false; FAIL(runner, "Offset %" PRId64 " for %s not a multiple of 8", offset, Str_Get_Ptr8(file)); break; } } DECREF(iter); if (offsets_ok) { PASS(runner, "All offsets are multiples of 8"); } DECREF(cf_metadata); DECREF(cf_writer); DECREF(folder); }
Schema* Schema_Load_IMP(Schema *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); String *class_name = (String*)CERTIFY(Hash_Fetch_Utf8(source, "_class", 6), STRING); Class *klass = Class_singleton(class_name, NULL); Schema *loaded = (Schema*)Class_Make_Obj(klass); Hash *type_dumps = (Hash*)CERTIFY(Hash_Fetch_Utf8(source, "fields", 6), HASH); Vector *analyzer_dumps = (Vector*)CERTIFY(Hash_Fetch_Utf8(source, "analyzers", 9), VECTOR); Vector *analyzers = (Vector*)Freezer_load((Obj*)analyzer_dumps); UNUSED_VAR(self); // Start with a blank Schema. Schema_init(loaded); SchemaIVARS *const loaded_ivars = Schema_IVARS(loaded); Vec_Grow(loaded_ivars->uniq_analyzers, Vec_Get_Size(analyzers)); HashIterator *iter = HashIter_new(type_dumps); while (HashIter_Next(iter)) { String *field = HashIter_Get_Key(iter); Hash *type_dump = (Hash*)CERTIFY(HashIter_Get_Value(iter), HASH); String *type_str = (String*)Hash_Fetch_Utf8(type_dump, "type", 4); if (type_str) { if (Str_Equals_Utf8(type_str, "fulltext", 8)) { // Replace the "analyzer" tick with the real thing. Obj *tick = CERTIFY(Hash_Fetch_Utf8(type_dump, "analyzer", 8), OBJ); Analyzer *analyzer = (Analyzer*)Vec_Fetch(analyzers, (uint32_t)Json_obj_to_i64(tick)); if (!analyzer) { THROW(ERR, "Can't find analyzer for '%o'", field); } Hash_Store_Utf8(type_dump, "analyzer", 8, INCREF(analyzer)); FullTextType *type = (FullTextType*)S_load_type(FULLTEXTTYPE, (Obj*)type_dump); Schema_Spec_Field(loaded, field, (FieldType*)type); DECREF(type); } else if (Str_Equals_Utf8(type_str, "string", 6)) { StringType *type = (StringType*)S_load_type(STRINGTYPE, (Obj*)type_dump); Schema_Spec_Field(loaded, field, (FieldType*)type); DECREF(type); } else if (Str_Equals_Utf8(type_str, "blob", 4)) { BlobType *type = (BlobType*)S_load_type(BLOBTYPE, (Obj*)type_dump); Schema_Spec_Field(loaded, field, (FieldType*)type); DECREF(type); } else if (Str_Equals_Utf8(type_str, "i32_t", 5)) { Int32Type *type = (Int32Type*)S_load_type(INT32TYPE, (Obj*)type_dump); Schema_Spec_Field(loaded, field, (FieldType*)type); DECREF(type); } else if (Str_Equals_Utf8(type_str, "i64_t", 5)) { Int64Type *type = (Int64Type*)S_load_type(INT64TYPE, (Obj*)type_dump); Schema_Spec_Field(loaded, field, (FieldType*)type); DECREF(type); } else if (Str_Equals_Utf8(type_str, "f32_t", 5)) { Float32Type *type = (Float32Type*)S_load_type(FLOAT32TYPE, (Obj*)type_dump); Schema_Spec_Field(loaded, field, (FieldType*)type); DECREF(type); } else if (Str_Equals_Utf8(type_str, "f64_t", 5)) { Float64Type *type = (Float64Type*)S_load_type(FLOAT64TYPE, (Obj*)type_dump); Schema_Spec_Field(loaded, field, (FieldType*)type); DECREF(type); } else { THROW(ERR, "Unknown type '%o' for field '%o'", type_str, field); } } else { FieldType *type = (FieldType*)CERTIFY(Freezer_load((Obj*)type_dump), FIELDTYPE); Schema_Spec_Field(loaded, field, type); DECREF(type); } } DECREF(iter); DECREF(analyzers); return loaded; }
static bool S_merge_updated_deletions(BackgroundMerger *self) { BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self); Hash *updated_deletions = NULL; PolyReader *new_polyreader = PolyReader_open((Obj*)ivars->folder, NULL, NULL); Vector *new_seg_readers = PolyReader_Get_Seg_Readers(new_polyreader); Vector *old_seg_readers = PolyReader_Get_Seg_Readers(ivars->polyreader); Hash *new_segs = Hash_new(Vec_Get_Size(new_seg_readers)); for (uint32_t i = 0, max = Vec_Get_Size(new_seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(new_seg_readers, i); String *seg_name = SegReader_Get_Seg_Name(seg_reader); Hash_Store(new_segs, seg_name, INCREF(seg_reader)); } for (uint32_t i = 0, max = Vec_Get_Size(old_seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(old_seg_readers, i); String *seg_name = SegReader_Get_Seg_Name(seg_reader); // If this segment was merged away... if (Hash_Fetch(ivars->doc_maps, seg_name)) { SegReader *new_seg_reader = (SegReader*)CERTIFY( Hash_Fetch(new_segs, seg_name), SEGREADER); int32_t old_del_count = SegReader_Del_Count(seg_reader); int32_t new_del_count = SegReader_Del_Count(new_seg_reader); // ... were any new deletions applied against it? if (old_del_count != new_del_count) { DeletionsReader *del_reader = (DeletionsReader*)SegReader_Obtain( new_seg_reader, Class_Get_Name(DELETIONSREADER)); if (!updated_deletions) { updated_deletions = Hash_new(max); } Hash_Store(updated_deletions, seg_name, (Obj*)DelReader_Iterator(del_reader)); } } } DECREF(new_polyreader); DECREF(new_segs); if (!updated_deletions) { return false; } else { PolyReader *merge_polyreader = PolyReader_open((Obj*)ivars->folder, ivars->snapshot, NULL); Vector *merge_seg_readers = PolyReader_Get_Seg_Readers(merge_polyreader); Snapshot *latest_snapshot = Snapshot_Read_File(Snapshot_new(), ivars->folder, NULL); int64_t new_seg_num = IxManager_Highest_Seg_Num(ivars->manager, latest_snapshot) + 1; Segment *new_segment = Seg_new(new_seg_num); SegWriter *seg_writer = SegWriter_new(ivars->schema, ivars->snapshot, new_segment, merge_polyreader); DeletionsWriter *del_writer = SegWriter_Get_Del_Writer(seg_writer); int64_t merge_seg_num = Seg_Get_Number(ivars->segment); uint32_t seg_tick = INT32_MAX; int32_t offset = INT32_MAX; SegWriter_Prep_Seg_Dir(seg_writer); for (uint32_t i = 0, max = Vec_Get_Size(merge_seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(merge_seg_readers, i); if (SegReader_Get_Seg_Num(seg_reader) == merge_seg_num) { I32Array *offsets = PolyReader_Offsets(merge_polyreader); seg_tick = i; offset = I32Arr_Get(offsets, seg_tick); DECREF(offsets); } } if (offset == INT32_MAX) { THROW(ERR, "Failed sanity check"); } HashIterator *iter = HashIter_new(updated_deletions); while (HashIter_Next(iter)) { String *seg_name = HashIter_Get_Key(iter); Matcher *deletions = (Matcher*)HashIter_Get_Value(iter); I32Array *doc_map = (I32Array*)CERTIFY( Hash_Fetch(ivars->doc_maps, seg_name), I32ARRAY); int32_t del; while (0 != (del = Matcher_Next(deletions))) { // Find the slot where the deleted doc resides in the // rewritten segment. If the doc was already deleted when we // were merging, do nothing. int32_t remapped = I32Arr_Get(doc_map, del); if (remapped) { // It's a new deletion, so carry it forward and zap it in // the rewritten segment. DelWriter_Delete_By_Doc_ID(del_writer, remapped + offset); } } } DECREF(iter); // Finish the segment and clean up. DelWriter_Finish(del_writer); SegWriter_Finish(seg_writer); DECREF(seg_writer); DECREF(new_segment); DECREF(latest_snapshot); DECREF(merge_polyreader); DECREF(updated_deletions); } return true; }