InStream* CFReader_Local_Open_In_IMP(CompoundFileReader *self, String *name) { CompoundFileReaderIVARS *const ivars = CFReader_IVARS(self); Hash *entry = (Hash*)Hash_Fetch(ivars->records, name); if (!entry) { InStream *instream = Folder_Local_Open_In(ivars->real_folder, name); if (!instream) { ERR_ADD_FRAME(Err_get_error()); } return instream; } else { Obj *len = Hash_Fetch_Utf8(entry, "length", 6); Obj *offset = Hash_Fetch_Utf8(entry, "offset", 6); if (!len || !offset) { Err_set_error(Err_new(Str_newf("Malformed entry for '%o' in '%o'", name, Folder_Get_Path(ivars->real_folder)))); return NULL; } else if (Str_Get_Size(ivars->path)) { String *fullpath = Str_newf("%o/%o", ivars->path, name); InStream *instream = InStream_Reopen(ivars->instream, fullpath, Obj_To_I64(offset), Obj_To_I64(len)); DECREF(fullpath); return instream; } else { return InStream_Reopen(ivars->instream, name, Obj_To_I64(offset), Obj_To_I64(len)); } } }
StringType* StringType_load(StringType *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); CharBuf *class_name = (CharBuf*)Hash_Fetch_Str(source, "_class", 6); VTable *vtable = (class_name != NULL && Obj_Is_A((Obj*)class_name, CHARBUF)) ? VTable_singleton(class_name, NULL) : STRINGTYPE; StringType *loaded = (StringType*)VTable_Make_Obj(vtable); Obj *boost_dump = Hash_Fetch_Str(source, "boost", 5); Obj *indexed_dump = Hash_Fetch_Str(source, "indexed", 7); Obj *stored_dump = Hash_Fetch_Str(source, "stored", 6); Obj *sortable_dump = Hash_Fetch_Str(source, "sortable", 8); UNUSED_VAR(self); StringType_init(loaded); if (boost_dump) { loaded->boost = (float)Obj_To_F64(boost_dump); } if (indexed_dump) { loaded->indexed = (bool_t)Obj_To_I64(indexed_dump); } if (stored_dump) { loaded->stored = (bool_t)Obj_To_I64(stored_dump); } if (sortable_dump) { loaded->sortable = (bool_t)Obj_To_I64(sortable_dump); } return loaded; }
InStream* CFReader_local_open_in(CompoundFileReader *self, const CharBuf *name) { Hash *entry = (Hash*)Hash_Fetch(self->records, (Obj*)name); if (!entry) { InStream *instream = Folder_Local_Open_In(self->real_folder, name); if (!instream) { ERR_ADD_FRAME(Err_get_error()); } return instream; } else { Obj *len = Hash_Fetch_Str(entry, "length", 6); Obj *offset = Hash_Fetch_Str(entry, "offset", 6); if (!len || !offset) { Err_set_error(Err_new(CB_newf("Malformed entry for '%o' in '%o'", name, Folder_Get_Path(self->real_folder)))); return NULL; } else if (CB_Get_Size(self->path)) { CharBuf *fullpath = CB_newf("%o/%o", self->path, name); InStream *instream = InStream_Reopen(self->instream, fullpath, Obj_To_I64(offset), Obj_To_I64(len)); DECREF(fullpath); return instream; } else { return InStream_Reopen(self->instream, name, Obj_To_I64(offset), Obj_To_I64(len)); } } }
DefaultHighlightReader* DefHLReader_init(DefaultHighlightReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, VArray *segments, int32_t seg_tick) { Segment *segment; Hash *metadata; HLReader_init((HighlightReader*)self, schema, folder, snapshot, segments, seg_tick); segment = DefHLReader_Get_Segment(self); metadata = (Hash*)Seg_Fetch_Metadata_Str(segment, "highlight", 9); if (!metadata) { metadata = (Hash*)Seg_Fetch_Metadata_Str(segment, "term_vectors", 12); } // Check format. if (metadata) { Obj *format = Hash_Fetch_Str(metadata, "format", 6); if (!format) { THROW(ERR, "Missing 'format' var"); } else { if (Obj_To_I64(format) != HLWriter_current_file_format) { THROW(ERR, "Unsupported highlight data format: %i64", Obj_To_I64(format)); } } } // Open instreams. { CharBuf *seg_name = Seg_Get_Name(segment); CharBuf *ix_file = CB_newf("%o/highlight.ix", seg_name); CharBuf *dat_file = CB_newf("%o/highlight.dat", seg_name); if (Folder_Exists(folder, ix_file)) { self->ix_in = Folder_Open_In(folder, ix_file); if (!self->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } self->dat_in = Folder_Open_In(folder, dat_file); if (!self->dat_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } } DECREF(ix_file); DECREF(dat_file); } return self; }
NumericType* NumType_load(NumericType *self, Obj *dump) { UNUSED_VAR(self); Hash *source = (Hash*)CERTIFY(dump, HASH); // Get a VTable CharBuf *class_name = (CharBuf*)Hash_Fetch_Str(source, "_class", 6); CharBuf *type_spec = (CharBuf*)Hash_Fetch_Str(source, "type", 4); VTable *vtable = NULL; if (class_name != NULL && Obj_Is_A((Obj*)class_name, CHARBUF)) { vtable = VTable_singleton(class_name, NULL); } else if (type_spec != NULL && Obj_Is_A((Obj*)type_spec, CHARBUF)) { if (CB_Equals_Str(type_spec, "i32_t", 5)) { vtable = INT32TYPE; } else if (CB_Equals_Str(type_spec, "i64_t", 5)) { vtable = INT64TYPE; } else if (CB_Equals_Str(type_spec, "f32_t", 5)) { vtable = FLOAT32TYPE; } else if (CB_Equals_Str(type_spec, "f64_t", 5)) { vtable = FLOAT64TYPE; } else { THROW(ERR, "Unrecognized type string: '%o'", type_spec); } } CERTIFY(vtable, VTABLE); NumericType *loaded = (NumericType*)VTable_Make_Obj(vtable); // Extract boost. Obj *boost_dump = Hash_Fetch_Str(source, "boost", 5); float boost = boost_dump ? (float)Obj_To_F64(boost_dump) : 1.0f; // Find boolean properties. Obj *indexed_dump = Hash_Fetch_Str(source, "indexed", 7); Obj *stored_dump = Hash_Fetch_Str(source, "stored", 6); Obj *sort_dump = Hash_Fetch_Str(source, "sortable", 8); bool_t indexed = indexed_dump ? (bool_t)Obj_To_I64(indexed_dump) : true; bool_t stored = stored_dump ? (bool_t)Obj_To_I64(stored_dump) : true; bool_t sortable = sort_dump ? (bool_t)Obj_To_I64(sort_dump) : false; return NumType_init2(loaded, boost, indexed, stored, sortable); }
Snapshot* Snapshot_read_file(Snapshot *self, Folder *folder, const CharBuf *path) { // Eliminate all prior data. Pick a snapshot file. S_zero_out(self); self->path = path ? CB_Clone(path) : IxFileNames_latest_snapshot(folder); if (self->path) { Hash *snap_data = (Hash*)CERTIFY( Json_slurp_json(folder, self->path), HASH); Obj *format_obj = CERTIFY( Hash_Fetch_Str(snap_data, "format", 6), OBJ); int32_t format = (int32_t)Obj_To_I64(format_obj); Obj *subformat_obj = Hash_Fetch_Str(snap_data, "subformat", 9); int32_t subformat = subformat_obj ? (int32_t)Obj_To_I64(subformat_obj) : 0; // Verify that we can read the index properly. if (format > Snapshot_current_file_format) { THROW(ERR, "Snapshot format too recent: %i32, %i32", format, Snapshot_current_file_format); } // Build up list of entries. VArray *list = (VArray*)CERTIFY( Hash_Fetch_Str(snap_data, "entries", 7), VARRAY); INCREF(list); if (format == 1 || (format == 2 && subformat < 1)) { VArray *cleaned = S_clean_segment_contents(list); DECREF(list); list = cleaned; } Hash_Clear(self->entries); for (uint32_t i = 0, max = VA_Get_Size(list); i < max; i++) { CharBuf *entry = (CharBuf*)CERTIFY( VA_Fetch(list, i), CHARBUF); Hash_Store(self->entries, (Obj*)entry, INCREF(&EMPTY)); } DECREF(list); DECREF(snap_data); } return self; }
DefaultSortReader* DefSortReader_init(DefaultSortReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, VArray *segments, int32_t seg_tick) { DataReader_init((DataReader*)self, schema, folder, snapshot, segments, seg_tick); DefaultSortReaderIVARS *const ivars = DefSortReader_IVARS(self); Segment *segment = DefSortReader_Get_Segment(self); Hash *metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "sort", 4); // Check format. ivars->format = 0; if (metadata) { Obj *format = Hash_Fetch_Utf8(metadata, "format", 6); if (!format) { THROW(ERR, "Missing 'format' var"); } else { ivars->format = (int32_t)Obj_To_I64(format); if (ivars->format < 2 || ivars->format > 3) { THROW(ERR, "Unsupported sort cache format: %i32", ivars->format); } } } // Init. ivars->caches = Hash_new(0); // Either extract or fake up the "counts", "null_ords", and "ord_widths" // hashes. if (metadata) { ivars->counts = (Hash*)INCREF(CERTIFY(Hash_Fetch_Utf8(metadata, "counts", 6), HASH)); ivars->null_ords = (Hash*)Hash_Fetch_Utf8(metadata, "null_ords", 9); if (ivars->null_ords) { ivars->null_ords = (Hash*)INCREF(CERTIFY(ivars->null_ords, HASH)); } else { ivars->null_ords = Hash_new(0); } ivars->ord_widths = (Hash*)Hash_Fetch_Utf8(metadata, "ord_widths", 10); if (ivars->ord_widths) { ivars->ord_widths = (Hash*)INCREF(CERTIFY(ivars->ord_widths, HASH)); } else { ivars->ord_widths = Hash_new(0); } } else { ivars->counts = Hash_new(0); ivars->null_ords = Hash_new(0); ivars->ord_widths = Hash_new(0); } return self; }
int32_t IntNum_compare_to(IntNum *self, Obj *other) { if (!Obj_Is_A(other, INTNUM)) { return -Obj_Compare_To(other, (Obj*)self); } int64_t self_value = IntNum_To_I64(self); int64_t other_value = Obj_To_I64(other); if (self_value < other_value) { return -1; } else if (self_value > other_value) { return 1; } return 0; }
Snapshot* Snapshot_read_file(Snapshot *self, Folder *folder, const CharBuf *filename) { /* Eliminate all prior data. Pick a snapshot file. */ S_zero_out(self); self->filename = filename ? CB_Clone(filename) : IxFileNames_latest_snapshot(folder); if (self->filename) { Hash *snap_data = (Hash*)ASSERT_IS_A( Json_slurp_json(folder, self->filename), HASH); Obj *format = ASSERT_IS_A( Hash_Fetch_Str(snap_data, "format", 6), OBJ); /* Verify that we can read the index properly. */ if (Obj_To_I64(format) > Snapshot_current_file_format) { THROW("Snapshot format too recent: %i64, %i32", Obj_To_I64(format), Snapshot_current_file_format); } /* Build up list of entries. */ { u32_t i, max; VArray *list = (VArray*)ASSERT_IS_A( Hash_Fetch_Str(snap_data, "entries", 7), VARRAY); Hash_Clear(self->entries); for (i = 0, max = VA_Get_Size(list); i < max; i++) { CharBuf *entry = (CharBuf*)ASSERT_IS_A( VA_Fetch(list, i), CHARBUF); Hash_Store(self->entries, entry, INCREF(&EMPTY)); } } DECREF(snap_data); } return self; }
DefaultDocReader* DefDocReader_init(DefaultDocReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, VArray *segments, i32_t seg_tick) { Hash *metadata; Segment *segment; DocReader_init((DocReader*)self, schema, folder, snapshot, segments, seg_tick); segment = DefDocReader_Get_Segment(self); metadata = (Hash*)Seg_Fetch_Metadata_Str(segment, "documents", 9); if (metadata) { CharBuf *seg_name = Seg_Get_Name(segment); CharBuf *ix_file = CB_newf("%o/documents.ix", seg_name); CharBuf *dat_file = CB_newf("%o/documents.dat", seg_name); Obj *format = Hash_Fetch_Str(metadata, "format", 6); /* Check format. */ if (!format) { THROW("Missing 'format' var"); } else { i64_t format_val = Obj_To_I64(format); if (format_val < DocWriter_current_file_format) { THROW("Obsolete doc storage format %i64; " "Index regeneration is required", format_val); } else if (format_val != DocWriter_current_file_format) { THROW("Unsupported doc storage format: %i64", format_val); } } /* Get streams. */ if (Folder_Exists(folder, ix_file)) { self->ix_in = Folder_Open_In(folder, ix_file); self->dat_in = Folder_Open_In(folder, dat_file); if (!self->ix_in || !self->dat_in) { CharBuf *mess = MAKE_MESS("Can't open either %o or %o", ix_file, dat_file); DECREF(ix_file); DECREF(dat_file); DECREF(self); Err_throw_mess(mess); } } DECREF(ix_file); DECREF(dat_file); } return self; }
Obj* ProximityQuery_Load_IMP(ProximityQuery *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); ProximityQuery_Load_t super_load = SUPER_METHOD_PTR(PROXIMITYQUERY, LUCY_ProximityQuery_Load); ProximityQuery *loaded = (ProximityQuery*)super_load(self, dump); ProximityQueryIVARS *loaded_ivars = ProximityQuery_IVARS(loaded); Obj *field = CERTIFY(Hash_Fetch_Utf8(source, "field", 5), OBJ); loaded_ivars->field = (String*)CERTIFY(Freezer_load(field), STRING); Obj *terms = CERTIFY(Hash_Fetch_Utf8(source, "terms", 5), OBJ); loaded_ivars->terms = (VArray*)CERTIFY(Freezer_load(terms), VARRAY); Obj *within = CERTIFY(Hash_Fetch_Utf8(source, "within", 6), OBJ); loaded_ivars->within = (uint32_t)Obj_To_I64(within); return (Obj*)loaded; }
void DefDelWriter_Merge_Segment_IMP(DefaultDeletionsWriter *self, SegReader *reader, I32Array *doc_map) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); UNUSED_VAR(doc_map); Segment *segment = SegReader_Get_Segment(reader); Hash *del_meta = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "deletions", 9); if (del_meta) { VArray *seg_readers = ivars->seg_readers; Hash *files = (Hash*)Hash_Fetch_Utf8(del_meta, "files", 5); if (files) { String *seg; Hash *mini_meta; Hash_Iterate(files); while (Hash_Next(files, (Obj**)&seg, (Obj**)&mini_meta)) { /* Find the segment the deletions from the SegReader * we're adding correspond to. If it's gone, we don't * need to worry about losing deletions files that point * at it. */ for (uint32_t i = 0, max = VA_Get_Size(seg_readers); i < max; i++) { SegReader *candidate = (SegReader*)VA_Fetch(seg_readers, i); String *candidate_name = Seg_Get_Name(SegReader_Get_Segment(candidate)); if (Str_Equals(seg, (Obj*)candidate_name)) { /* If the count hasn't changed, we're about to * merge away the most recent deletions file * pointing at this target segment -- so force a * new file to be written out. */ int32_t count = (int32_t)Obj_To_I64(Hash_Fetch_Utf8(mini_meta, "count", 5)); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Obtain( candidate, Class_Get_Name(DELETIONSREADER)); if (count == DelReader_Del_Count(del_reader)) { ivars->updated[i] = true; } break; } } } } } }
bool_t Seg_read_file(Segment *self, Folder *folder) { CharBuf *filename = CB_newf("%o/segmeta.json", self->name); Hash *metadata = (Hash*)Json_slurp_json(folder, filename); Hash *my_metadata; // Bail unless the segmeta file was read successfully. DECREF(filename); if (!metadata) { return false; } CERTIFY(metadata, HASH); // Grab metadata for the Segment object itself. DECREF(self->metadata); self->metadata = metadata; my_metadata = (Hash*)CERTIFY(Hash_Fetch_Str(self->metadata, "segmeta", 7), HASH); // Assign. Obj *count = Hash_Fetch_Str(my_metadata, "count", 5); if (!count) { count = Hash_Fetch_Str(my_metadata, "doc_count", 9); } if (!count) { THROW(ERR, "Missing 'count'"); } else { self->count = Obj_To_I64(count); } // Get list of field nums. VArray *source_by_num = (VArray*)Hash_Fetch_Str(my_metadata, "field_names", 11); uint32_t num_fields = source_by_num ? VA_Get_Size(source_by_num) : 0; if (source_by_num == NULL) { THROW(ERR, "Failed to extract 'field_names' from metadata"); } // Init. DECREF(self->by_num); DECREF(self->by_name); self->by_num = VA_new(num_fields); self->by_name = Hash_new(num_fields); // Copy the list of fields from the source. for (uint32_t i = 0; i < num_fields; i++) { CharBuf *name = (CharBuf*)VA_Fetch(source_by_num, i); Seg_Add_Field(self, name); } return true; }
static void S_zap_dead_merge(FilePurger *self, Hash *candidates) { IndexManager *manager = self->manager; Lock *merge_lock = IxManager_Make_Merge_Lock(manager); Lock_Clear_Stale(merge_lock); if (!Lock_Is_Locked(merge_lock)) { Hash *merge_data = IxManager_Read_Merge_Data(manager); Obj *cutoff = merge_data ? Hash_Fetch_Str(merge_data, "cutoff", 6) : NULL; if (cutoff) { CharBuf *cutoff_seg = Seg_num_to_name(Obj_To_I64(cutoff)); if (Folder_Exists(self->folder, cutoff_seg)) { ZombieCharBuf *merge_json = ZCB_WRAP_STR("merge.json", 10); DirHandle *dh = Folder_Open_Dir(self->folder, cutoff_seg); CharBuf *entry = dh ? DH_Get_Entry(dh) : NULL; CharBuf *filepath = CB_new(32); if (!dh) { THROW(ERR, "Can't open segment dir '%o'", filepath); } Hash_Store(candidates, (Obj*)cutoff_seg, INCREF(&EMPTY)); Hash_Store(candidates, (Obj*)merge_json, INCREF(&EMPTY)); while (DH_Next(dh)) { // TODO: recursively delete subdirs within seg dir. CB_setf(filepath, "%o/%o", cutoff_seg, entry); Hash_Store(candidates, (Obj*)filepath, INCREF(&EMPTY)); } DECREF(filepath); DECREF(dh); } DECREF(cutoff_seg); } DECREF(merge_data); } DECREF(merge_lock); return; }
static void test_offsets(TestBatchRunner *runner) { Folder *folder = S_folder_with_contents(); CompoundFileWriter *cf_writer = CFWriter_new(folder); Hash *cf_metadata; Hash *files; CFWriter_Consolidate(cf_writer); cf_metadata = (Hash*)CERTIFY( Json_slurp_json(folder, cfmeta_file), HASH); files = (Hash*)CERTIFY( Hash_Fetch_Utf8(cf_metadata, "files", 5), HASH); bool offsets_ok = true; TEST_TRUE(runner, Hash_Get_Size(files) > 0, "Multiple files"); HashIterator *iter = HashIter_new(files); while (HashIter_Next(iter)) { String *file = HashIter_Get_Key(iter); Hash *stats = (Hash*)CERTIFY(HashIter_Get_Value(iter), HASH); Obj *offset = CERTIFY(Hash_Fetch_Utf8(stats, "offset", 6), OBJ); int64_t offs = Obj_To_I64(offset); if (offs % 8 != 0) { offsets_ok = false; FAIL(runner, "Offset %" PRId64 " for %s not a multiple of 8", offset, Str_Get_Ptr8(file)); break; } } DECREF(iter); if (offsets_ok) { PASS(runner, "All offsets are multiples of 8"); } DECREF(cf_metadata); DECREF(cf_writer); DECREF(folder); }
static SortCache* S_lazy_init_sort_cache(DefaultSortReader *self, String *field) { DefaultSortReaderIVARS *const ivars = DefSortReader_IVARS(self); // See if we have any values. Obj *count_obj = Hash_Fetch(ivars->counts, (Obj*)field); int32_t count = count_obj ? (int32_t)Obj_To_I64(count_obj) : 0; if (!count) { return NULL; } // Get a FieldType and sanity check that the field is sortable. Schema *schema = DefSortReader_Get_Schema(self); FieldType *type = Schema_Fetch_Type(schema, field); if (!type || !FType_Sortable(type)) { THROW(ERR, "'%o' isn't a sortable field", field); } // Open streams. Folder *folder = DefSortReader_Get_Folder(self); Segment *segment = DefSortReader_Get_Segment(self); String *seg_name = Seg_Get_Name(segment); int32_t field_num = Seg_Field_Num(segment, field); int8_t prim_id = FType_Primitive_ID(type); bool var_width = (prim_id == FType_TEXT || prim_id == FType_BLOB) ? true : false; String *ord_path = Str_newf("%o/sort-%i32.ord", seg_name, field_num); InStream *ord_in = Folder_Open_In(folder, ord_path); DECREF(ord_path); if (!ord_in) { THROW(ERR, "Error building sort cache for '%o': %o", field, Err_get_error()); } InStream *ix_in = NULL; if (var_width) { String *ix_path = Str_newf("%o/sort-%i32.ix", seg_name, field_num); ix_in = Folder_Open_In(folder, ix_path); DECREF(ix_path); if (!ix_in) { THROW(ERR, "Error building sort cache for '%o': %o", field, Err_get_error()); } } String *dat_path = Str_newf("%o/sort-%i32.dat", seg_name, field_num); InStream *dat_in = Folder_Open_In(folder, dat_path); DECREF(dat_path); if (!dat_in) { THROW(ERR, "Error building sort cache for '%o': %o", field, Err_get_error()); } Obj *null_ord_obj = Hash_Fetch(ivars->null_ords, (Obj*)field); int32_t null_ord = null_ord_obj ? (int32_t)Obj_To_I64(null_ord_obj) : -1; Obj *ord_width_obj = Hash_Fetch(ivars->ord_widths, (Obj*)field); int32_t ord_width = ord_width_obj ? (int32_t)Obj_To_I64(ord_width_obj) : S_calc_ord_width(count); int32_t doc_max = (int32_t)Seg_Get_Count(segment); SortCache *cache = NULL; switch (prim_id & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: cache = (SortCache*)TextSortCache_new(field, type, count, doc_max, null_ord, ord_width, ord_in, ix_in, dat_in); break; case FType_INT32: cache = (SortCache*)I32SortCache_new(field, type, count, doc_max, null_ord, ord_width, ord_in, dat_in); break; case FType_INT64: cache = (SortCache*)I64SortCache_new(field, type, count, doc_max, null_ord, ord_width, ord_in, dat_in); break; case FType_FLOAT32: cache = (SortCache*)F32SortCache_new(field, type, count, doc_max, null_ord, ord_width, ord_in, dat_in); break; case FType_FLOAT64: cache = (SortCache*)F64SortCache_new(field, type, count, doc_max, null_ord, ord_width, ord_in, dat_in); break; default: THROW(ERR, "No SortCache class for %o", type); } Hash_Store(ivars->caches, (Obj*)field, (Obj*)cache); if (ivars->format == 2) { // bug compatibility SortCache_Set_Native_Ords(cache, true); } DECREF(ord_in); DECREF(ix_in); DECREF(dat_in); return cache; }
bool_t Obj_to_bool(Obj *self) { return !!Obj_To_I64(self); }
static bool_t S_maybe_merge(Indexer *self, VArray *seg_readers) { bool_t merge_happened = false; uint32_t num_seg_readers = VA_Get_Size(seg_readers); Lock *merge_lock = IxManager_Make_Merge_Lock(self->manager); bool_t got_merge_lock = Lock_Obtain(merge_lock); int64_t cutoff; VArray *to_merge; uint32_t i, max; if (got_merge_lock) { self->merge_lock = merge_lock; cutoff = 0; } else { // If something else holds the merge lock, don't interfere. Hash *merge_data = IxManager_Read_Merge_Data(self->manager); if (merge_data) { Obj *cutoff_obj = Hash_Fetch_Str(merge_data, "cutoff", 6); if (cutoff_obj) { cutoff = Obj_To_I64(cutoff_obj); } else { cutoff = I64_MAX; } DECREF(merge_data); } else { cutoff = I64_MAX; } DECREF(merge_lock); } // Get a list of segments to recycle. Validate and confirm that there are // no dupes in the list. to_merge = IxManager_Recycle(self->manager, self->polyreader, self->del_writer, cutoff, self->optimize); { Hash *seen = Hash_new(VA_Get_Size(to_merge)); for (i = 0, max = VA_Get_Size(to_merge); i < max; i++) { SegReader *seg_reader = (SegReader*)CERTIFY( VA_Fetch(to_merge, i), SEGREADER); CharBuf *seg_name = SegReader_Get_Seg_Name(seg_reader); if (Hash_Fetch(seen, (Obj*)seg_name)) { DECREF(seen); DECREF(to_merge); THROW(ERR, "Recycle() tried to merge segment '%o' twice", seg_name); } Hash_Store(seen, (Obj*)seg_name, INCREF(&EMPTY)); } DECREF(seen); } // Consolidate segments if either sparse or optimizing forced. for (i = 0, max = VA_Get_Size(to_merge); i < max; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(to_merge, i); int64_t seg_num = SegReader_Get_Seg_Num(seg_reader); Matcher *deletions = DelWriter_Seg_Deletions(self->del_writer, seg_reader); I32Array *doc_map = DelWriter_Generate_Doc_Map(self->del_writer, deletions, SegReader_Doc_Max(seg_reader), (int32_t)Seg_Get_Count(self->segment) ); if (seg_num <= cutoff) { THROW(ERR, "Segment %o violates cutoff (%i64 <= %i64)", SegReader_Get_Seg_Name(seg_reader), seg_num, cutoff); } SegWriter_Merge_Segment(self->seg_writer, seg_reader, doc_map); merge_happened = true; DECREF(deletions); DECREF(doc_map); } // Write out new deletions. if (DelWriter_Updated(self->del_writer)) { // Only write out if they haven't all been applied. if (VA_Get_Size(to_merge) != num_seg_readers) { DelWriter_Finish(self->del_writer); } } DECREF(to_merge); return merge_happened; }
Indexer* Indexer_init(Indexer *self, Schema *schema, Obj *index, IndexManager *manager, int32_t flags) { bool_t create = (flags & Indexer_CREATE) ? true : false; bool_t truncate = (flags & Indexer_TRUNCATE) ? true : false; Folder *folder = S_init_folder(index, create); Lock *write_lock; CharBuf *latest_snapfile; Snapshot *latest_snapshot = Snapshot_new(); // Init. self->stock_doc = Doc_new(NULL, 0); self->truncate = false; self->optimize = false; self->prepared = false; self->needs_commit = false; self->snapfile = NULL; self->merge_lock = NULL; // Assign. self->folder = folder; self->manager = manager ? (IndexManager*)INCREF(manager) : IxManager_new(NULL, NULL); IxManager_Set_Folder(self->manager, folder); // Get a write lock for this folder. write_lock = IxManager_Make_Write_Lock(self->manager); Lock_Clear_Stale(write_lock); if (Lock_Obtain(write_lock)) { // Only assign if successful, otherwise DESTROY unlocks -- bad! self->write_lock = write_lock; } else { DECREF(write_lock); DECREF(self); RETHROW(INCREF(Err_get_error())); } // Find the latest snapshot or create a new one. latest_snapfile = IxFileNames_latest_snapshot(folder); if (latest_snapfile) { Snapshot_Read_File(latest_snapshot, folder, latest_snapfile); } // Look for an existing Schema if one wasn't supplied. if (schema) { self->schema = (Schema*)INCREF(schema); } else { if (!latest_snapfile) { THROW(ERR, "No Schema supplied, and can't find one in the index"); } else { CharBuf *schema_file = S_find_schema_file(latest_snapshot); Hash *dump = (Hash*)Json_slurp_json(folder, schema_file); if (dump) { // read file successfully self->schema = (Schema*)CERTIFY( VTable_Load_Obj(SCHEMA, (Obj*)dump), SCHEMA); schema = self->schema; DECREF(dump); schema_file = NULL; } else { THROW(ERR, "Failed to parse %o", schema_file); } } } // If we're clobbering, start with an empty Snapshot and an empty // PolyReader. Otherwise, start with the most recent Snapshot and an // up-to-date PolyReader. if (truncate) { self->snapshot = Snapshot_new(); self->polyreader = PolyReader_new(schema, folder, NULL, NULL, NULL); self->truncate = true; } else { // TODO: clone most recent snapshot rather than read it twice. self->snapshot = (Snapshot*)INCREF(latest_snapshot); self->polyreader = latest_snapfile ? PolyReader_open((Obj*)folder, NULL, NULL) : PolyReader_new(schema, folder, NULL, NULL, NULL); if (latest_snapfile) { // Make sure than any existing fields which may have been // dynamically added during past indexing sessions get added. Schema *old_schema = PolyReader_Get_Schema(self->polyreader); Schema_Eat(schema, old_schema); } } // Zap detritus from previous sessions. { // Note: we have to feed FilePurger with the most recent snapshot file // now, but with the Indexer's snapshot later. FilePurger *file_purger = FilePurger_new(folder, latest_snapshot, self->manager); FilePurger_Purge(file_purger); DECREF(file_purger); } // Create a new segment. { int64_t new_seg_num = IxManager_Highest_Seg_Num(self->manager, latest_snapshot) + 1; Lock *merge_lock = IxManager_Make_Merge_Lock(self->manager); uint32_t i, max; if (Lock_Is_Locked(merge_lock)) { // If there's a background merge process going on, stay out of its // way. Hash *merge_data = IxManager_Read_Merge_Data(self->manager); Obj *cutoff_obj = merge_data ? Hash_Fetch_Str(merge_data, "cutoff", 6) : NULL; if (!cutoff_obj) { DECREF(merge_lock); DECREF(merge_data); THROW(ERR, "Background merge detected, but can't read merge data"); } else { int64_t cutoff = Obj_To_I64(cutoff_obj); if (cutoff >= new_seg_num) { new_seg_num = cutoff + 1; } } DECREF(merge_data); } self->segment = Seg_new(new_seg_num); // Add all known fields to Segment. { VArray *fields = Schema_All_Fields(schema); for (i = 0, max = VA_Get_Size(fields); i < max; i++) { Seg_Add_Field(self->segment, (CharBuf*)VA_Fetch(fields, i)); } DECREF(fields); } DECREF(merge_lock); } // Create new SegWriter and FilePurger. self->file_purger = FilePurger_new(folder, self->snapshot, self->manager); self->seg_writer = SegWriter_new(self->schema, self->snapshot, self->segment, self->polyreader); SegWriter_Prep_Seg_Dir(self->seg_writer); // Grab a local ref to the DeletionsWriter. self->del_writer = (DeletionsWriter*)INCREF( SegWriter_Get_Del_Writer(self->seg_writer)); DECREF(latest_snapfile); DECREF(latest_snapshot); return self; }
CompoundFileReader* CFReader_do_open(CompoundFileReader *self, Folder *folder) { CharBuf *cfmeta_file = (CharBuf*)ZCB_WRAP_STR("cfmeta.json", 11); Hash *metadata = (Hash*)Json_slurp_json((Folder*)folder, cfmeta_file); Err *error = NULL; Folder_init((Folder*)self, Folder_Get_Path(folder)); // Parse metadata file. if (!metadata || !Hash_Is_A(metadata, HASH)) { error = Err_new(CB_newf("Can't read '%o' in '%o'", cfmeta_file, Folder_Get_Path(folder))); } else { Obj *format = Hash_Fetch_Str(metadata, "format", 6); self->format = format ? (int32_t)Obj_To_I64(format) : 0; self->records = (Hash*)INCREF(Hash_Fetch_Str(metadata, "files", 5)); if (self->format < 1) { error = Err_new(CB_newf("Corrupt %o file: Missing or invalid 'format'", cfmeta_file)); } else if (self->format > CFWriter_current_file_format) { error = Err_new(CB_newf("Unsupported compound file format: %i32 " "(current = %i32", self->format, CFWriter_current_file_format)); } else if (!self->records) { error = Err_new(CB_newf("Corrupt %o file: missing 'files' key", cfmeta_file)); } } DECREF(metadata); if (error) { Err_set_error(error); DECREF(self); return NULL; } // Open an instream which we'll clone over and over. CharBuf *cf_file = (CharBuf*)ZCB_WRAP_STR("cf.dat", 6); self->instream = Folder_Open_In(folder, cf_file); if (!self->instream) { ERR_ADD_FRAME(Err_get_error()); DECREF(self); return NULL; } // Assign. self->real_folder = (Folder*)INCREF(folder); // Strip directory name from filepaths for old format. if (self->format == 1) { VArray *files = Hash_Keys(self->records); ZombieCharBuf *filename = ZCB_BLANK(); ZombieCharBuf *folder_name = IxFileNames_local_part(Folder_Get_Path(folder), ZCB_BLANK()); size_t folder_name_len = ZCB_Length(folder_name); for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) { CharBuf *orig = (CharBuf*)VA_Fetch(files, i); if (CB_Starts_With(orig, (CharBuf*)folder_name)) { Obj *record = Hash_Delete(self->records, (Obj*)orig); ZCB_Assign(filename, orig); ZCB_Nip(filename, folder_name_len + sizeof(DIR_SEP) - 1); Hash_Store(self->records, (Obj*)filename, (Obj*)record); } } DECREF(files); } return self; }
static void S_attempt_To_I64(void *context) { Obj_To_I64((Obj*)context); }
CompoundFileReader* CFReader_do_open(CompoundFileReader *self, Folder *folder) { CompoundFileReaderIVARS *const ivars = CFReader_IVARS(self); String *cfmeta_file = (String*)SSTR_WRAP_UTF8("cfmeta.json", 11); Hash *metadata = (Hash*)Json_slurp_json((Folder*)folder, cfmeta_file); Err *error = NULL; Folder_init((Folder*)self, Folder_Get_Path(folder)); // Parse metadata file. if (!metadata || !Hash_Is_A(metadata, HASH)) { error = Err_new(Str_newf("Can't read '%o' in '%o'", cfmeta_file, Folder_Get_Path(folder))); } else { Obj *format = Hash_Fetch_Utf8(metadata, "format", 6); ivars->format = format ? (int32_t)Obj_To_I64(format) : 0; ivars->records = (Hash*)INCREF(Hash_Fetch_Utf8(metadata, "files", 5)); if (ivars->format < 1) { error = Err_new(Str_newf("Corrupt %o file: Missing or invalid 'format'", cfmeta_file)); } else if (ivars->format > CFWriter_current_file_format) { error = Err_new(Str_newf("Unsupported compound file format: %i32 " "(current = %i32", ivars->format, CFWriter_current_file_format)); } else if (!ivars->records) { error = Err_new(Str_newf("Corrupt %o file: missing 'files' key", cfmeta_file)); } } DECREF(metadata); if (error) { Err_set_error(error); DECREF(self); return NULL; } // Open an instream which we'll clone over and over. String *cf_file = (String*)SSTR_WRAP_UTF8("cf.dat", 6); ivars->instream = Folder_Open_In(folder, cf_file); if (!ivars->instream) { ERR_ADD_FRAME(Err_get_error()); DECREF(self); return NULL; } // Assign. ivars->real_folder = (Folder*)INCREF(folder); // Strip directory name from filepaths for old format. if (ivars->format == 1) { Vector *files = Hash_Keys(ivars->records); String *folder_name = IxFileNames_local_part(Folder_Get_Path(folder)); size_t folder_name_len = Str_Length(folder_name); for (uint32_t i = 0, max = Vec_Get_Size(files); i < max; i++) { String *orig = (String*)Vec_Fetch(files, i); if (Str_Starts_With(orig, folder_name)) { Obj *record = Hash_Delete(ivars->records, orig); size_t offset = folder_name_len + sizeof(CHY_DIR_SEP) - 1; size_t len = Str_Length(orig) - offset; String *filename = Str_SubString(orig, offset, len); Hash_Store(ivars->records, filename, (Obj*)record); DECREF(filename); } } DECREF(folder_name); DECREF(files); } return self; }
bool Obj_To_Bool_IMP(Obj *self) { return !!Obj_To_I64(self); }
void Inverter_invert_doc(Inverter *self, Doc *doc) { InverterIVARS *const ivars = Inverter_IVARS(self); Hash *const fields = (Hash*)Doc_Get_Fields(doc); uint32_t num_keys = Hash_Iterate(fields); // Prepare for the new doc. Inverter_Set_Doc(self, doc); // Extract and invert the doc's fields. while (num_keys--) { Obj *key, *obj; Hash_Next(fields, &key, &obj); CharBuf *field = (CharBuf*)CERTIFY(key, CHARBUF); InverterEntry *inventry = S_fetch_entry(ivars, field); InverterEntryIVARS *inventry_ivars = InvEntry_IVARS(inventry); FieldType *type = inventry_ivars->type; // Get the field value. switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: { CharBuf *char_buf = (CharBuf*)CERTIFY(obj, CHARBUF); ViewCharBuf *value = (ViewCharBuf*)inventry_ivars->value; ViewCB_Assign(value, char_buf); break; } case FType_BLOB: { ByteBuf *byte_buf = (ByteBuf*)CERTIFY(obj, BYTEBUF); ViewByteBuf *value = (ViewByteBuf*)inventry_ivars->value; ViewBB_Assign(value, byte_buf); break; } case FType_INT32: { int32_t int_val = (int32_t)Obj_To_I64(obj); Integer32* value = (Integer32*)inventry_ivars->value; Int32_Set_Value(value, int_val); break; } case FType_INT64: { int64_t int_val = Obj_To_I64(obj); Integer64* value = (Integer64*)inventry_ivars->value; Int64_Set_Value(value, int_val); break; } case FType_FLOAT32: { float float_val = (float)Obj_To_F64(obj); Float32* value = (Float32*)inventry_ivars->value; Float32_Set_Value(value, float_val); break; } case FType_FLOAT64: { double float_val = Obj_To_F64(obj); Float64* value = (Float64*)inventry_ivars->value; Float64_Set_Value(value, float_val); break; } default: THROW(ERR, "Unrecognized type: %o", type); } Inverter_Add_Field(self, inventry); } }
static bool_t S_to_json(Obj *dump, CharBuf *json, int32_t depth) { // Guard against infinite recursion in self-referencing data structures. if (depth > MAX_DEPTH) { CharBuf *mess = MAKE_MESS("Exceeded max depth of %i32", MAX_DEPTH); Err_set_error(Err_new(mess)); return false; } if (!dump) { CB_Cat_Trusted_Str(json, "null", 4); } else if (dump == (Obj*)CFISH_TRUE) { CB_Cat_Trusted_Str(json, "true", 4); } else if (dump == (Obj*)CFISH_FALSE) { CB_Cat_Trusted_Str(json, "false", 5); } else if (Obj_Is_A(dump, CHARBUF)) { S_append_json_string(dump, json); } else if (Obj_Is_A(dump, INTNUM)) { CB_catf(json, "%i64", Obj_To_I64(dump)); } else if (Obj_Is_A(dump, FLOATNUM)) { CB_catf(json, "%f64", Obj_To_F64(dump)); } else if (Obj_Is_A(dump, VARRAY)) { VArray *array = (VArray*)dump; size_t size = VA_Get_Size(array); if (size == 0) { // Put empty array on single line. CB_Cat_Trusted_Str(json, "[]", 2); return true; } else if (size == 1) { Obj *elem = VA_Fetch(array, 0); if (!(Obj_Is_A(elem, HASH) || Obj_Is_A(elem, VARRAY))) { // Put array containing single scalar element on one line. CB_Cat_Trusted_Str(json, "[", 1); if (!S_to_json(elem, json, depth + 1)) { return false; } CB_Cat_Trusted_Str(json, "]", 1); return true; } } // Fall back to spreading elements across multiple lines. CB_Cat_Trusted_Str(json, "[", 1); for (size_t i = 0; i < size; i++) { CB_Cat_Trusted_Str(json, "\n", 1); S_cat_whitespace(json, depth + 1); if (!S_to_json(VA_Fetch(array, i), json, depth + 1)) { return false; } if (i + 1 < size) { CB_Cat_Trusted_Str(json, ",", 1); } } CB_Cat_Trusted_Str(json, "\n", 1); S_cat_whitespace(json, depth); CB_Cat_Trusted_Str(json, "]", 1); } else if (Obj_Is_A(dump, HASH)) { Hash *hash = (Hash*)dump; size_t size = Hash_Get_Size(hash); // Put empty hash on single line. if (size == 0) { CB_Cat_Trusted_Str(json, "{}", 2); return true; } // Validate that all keys are strings, then sort. VArray *keys = Hash_Keys(hash); for (size_t i = 0; i < size; i++) { Obj *key = VA_Fetch(keys, i); if (!key || !Obj_Is_A(key, CHARBUF)) { DECREF(keys); CharBuf *key_class = key ? Obj_Get_Class_Name(key) : NULL; CharBuf *mess = MAKE_MESS("Illegal key type: %o", key_class); Err_set_error(Err_new(mess)); return false; } } VA_Sort(keys, NULL, NULL); // Spread pairs across multiple lines. CB_Cat_Trusted_Str(json, "{", 1); for (size_t i = 0; i < size; i++) { Obj *key = VA_Fetch(keys, i); CB_Cat_Trusted_Str(json, "\n", 1); S_cat_whitespace(json, depth + 1); S_append_json_string(key, json); CB_Cat_Trusted_Str(json, ": ", 2); if (!S_to_json(Hash_Fetch(hash, key), json, depth + 1)) { DECREF(keys); return false; } if (i + 1 < size) { CB_Cat_Trusted_Str(json, ",", 1); } } CB_Cat_Trusted_Str(json, "\n", 1); S_cat_whitespace(json, depth); CB_Cat_Trusted_Str(json, "}", 1); DECREF(keys); } return true; }