static void test_stemming(TestBatchRunner *runner) { FSFolder *modules_folder = TestUtils_modules_folder(); String *path = Str_newf("analysis/snowstem/source/test/tests.json"); Hash *tests = (Hash*)Json_slurp_json((Folder*)modules_folder, path); if (!tests) { RETHROW(Err_get_error()); } String *iso; Hash *lang_data; Hash_Iterate(tests); while (Hash_Next(tests, (Obj**)&iso, (Obj**)&lang_data)) { VArray *words = (VArray*)Hash_Fetch_Utf8(lang_data, "words", 5); VArray *stems = (VArray*)Hash_Fetch_Utf8(lang_data, "stems", 5); SnowballStemmer *stemmer = SnowStemmer_new(iso); for (uint32_t i = 0, max = VA_Get_Size(words); i < max; i++) { String *word = (String*)VA_Fetch(words, i); VArray *got = SnowStemmer_Split(stemmer, word); String *stem = (String*)VA_Fetch(got, 0); TEST_TRUE(runner, stem && Str_Is_A(stem, STRING) && Str_Equals(stem, VA_Fetch(stems, i)), "Stem %s: %s", Str_Get_Ptr8(iso), Str_Get_Ptr8(word) ); DECREF(got); } DECREF(stemmer); } DECREF(tests); DECREF(modules_folder); DECREF(path); }
Obj* RangeQuery_Load_IMP(RangeQuery *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); RangeQuery_Load_t super_load = SUPER_METHOD_PTR(RANGEQUERY, LUCY_RangeQuery_Load); RangeQuery *loaded = (RangeQuery*)super_load(self, dump); RangeQueryIVARS *loaded_ivars = RangeQuery_IVARS(loaded); Obj *field = CERTIFY(Hash_Fetch_Utf8(source, "field", 5), OBJ); loaded_ivars->field = (String*)CERTIFY(Freezer_load(field), STRING); Obj *lower_term = Hash_Fetch_Utf8(source, "lower_term", 10); if (lower_term) { loaded_ivars->lower_term = (Obj*)CERTIFY(Freezer_load(lower_term), OBJ); } Obj *upper_term = Hash_Fetch_Utf8(source, "upper_term", 10); if (upper_term) { loaded_ivars->upper_term = (Obj*)CERTIFY(Freezer_load(upper_term), OBJ); } Obj *include_lower = CERTIFY(Hash_Fetch_Utf8(source, "include_lower", 13), OBJ); loaded_ivars->include_lower = Json_obj_to_bool(include_lower); Obj *include_upper = CERTIFY(Hash_Fetch_Utf8(source, "include_upper", 13), OBJ); loaded_ivars->include_upper = Json_obj_to_bool(include_upper); return (Obj*)loaded; }
BlobType* BlobType_Load_IMP(BlobType *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); String *class_name = (String*)Hash_Fetch_Utf8(source, "_class", 6); VTable *vtable = (class_name != NULL && Obj_Is_A((Obj*)class_name, STRING)) ? VTable_singleton(class_name, NULL) : BLOBTYPE; BlobType *loaded = (BlobType*)VTable_Make_Obj(vtable); Obj *boost_dump = Hash_Fetch_Utf8(source, "boost", 5); Obj *indexed_dump = Hash_Fetch_Utf8(source, "indexed", 7); Obj *stored_dump = Hash_Fetch_Utf8(source, "stored", 6); UNUSED_VAR(self); BlobType_init(loaded, false); BlobTypeIVARS *const loaded_ivars = BlobType_IVARS(loaded); if (boost_dump) { loaded_ivars->boost = (float)Obj_To_F64(boost_dump); } if (indexed_dump) { loaded_ivars->indexed = Obj_To_Bool(indexed_dump); } if (stored_dump){ loaded_ivars->stored = Obj_To_Bool(stored_dump); } return loaded; }
InStream* CFReader_Local_Open_In_IMP(CompoundFileReader *self, String *name) { CompoundFileReaderIVARS *const ivars = CFReader_IVARS(self); Hash *entry = (Hash*)Hash_Fetch(ivars->records, name); if (!entry) { InStream *instream = Folder_Local_Open_In(ivars->real_folder, name); if (!instream) { ERR_ADD_FRAME(Err_get_error()); } return instream; } else { Obj *len = Hash_Fetch_Utf8(entry, "length", 6); Obj *offset = Hash_Fetch_Utf8(entry, "offset", 6); if (!len || !offset) { Err_set_error(Err_new(Str_newf("Malformed entry for '%o' in '%o'", name, Folder_Get_Path(ivars->real_folder)))); return NULL; } else if (Str_Get_Size(ivars->path)) { String *fullpath = Str_newf("%o/%o", ivars->path, name); InStream *instream = InStream_Reopen(ivars->instream, fullpath, Obj_To_I64(offset), Obj_To_I64(len)); DECREF(fullpath); return instream; } else { return InStream_Reopen(ivars->instream, name, Obj_To_I64(offset), Obj_To_I64(len)); } } }
bool LFLock_Maybe_Delete_File_IMP(LockFileLock *self, String *path, bool delete_mine, bool delete_other) { LockFileLockIVARS *const ivars = LFLock_IVARS(self); Folder *folder = ivars->folder; bool success = false; // Only delete locks that start with our lock name. if (!Str_Starts_With_Utf8(path, "locks", 5)) { return false; } StringIterator *iter = Str_Top(path); StrIter_Advance(iter, 5 + 1); if (!StrIter_Starts_With(iter, ivars->name)) { DECREF(iter); return false; } DECREF(iter); // Attempt to delete dead lock file. if (Folder_Exists(folder, path)) { Hash *hash = (Hash*)Json_slurp_json(folder, path); if (hash != NULL && Obj_Is_A((Obj*)hash, HASH)) { String *pid_buf = (String*)Hash_Fetch_Utf8(hash, "pid", 3); String *host = (String*)Hash_Fetch_Utf8(hash, "host", 4); String *name = (String*)Hash_Fetch_Utf8(hash, "name", 4); // Match hostname and lock name. if (host != NULL && Str_Is_A(host, STRING) && Str_Equals(host, (Obj*)ivars->host) && name != NULL && Str_Is_A(name, STRING) && Str_Equals(name, (Obj*)ivars->name) && pid_buf != NULL && Str_Is_A(pid_buf, STRING) ) { // Verify that pid is either mine or dead. int pid = (int)Str_To_I64(pid_buf); if ((delete_mine && pid == PID_getpid()) // This process. || (delete_other && !PID_active(pid)) // Dead pid. ) { if (Folder_Delete(folder, path)) { success = true; } else { String *mess = MAKE_MESS("Can't delete '%o'", path); DECREF(hash); Err_throw_mess(ERR, mess); } } } } DECREF(hash); } return success; }
DefaultSortReader* DefSortReader_init(DefaultSortReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, VArray *segments, int32_t seg_tick) { DataReader_init((DataReader*)self, schema, folder, snapshot, segments, seg_tick); DefaultSortReaderIVARS *const ivars = DefSortReader_IVARS(self); Segment *segment = DefSortReader_Get_Segment(self); Hash *metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "sort", 4); // Check format. ivars->format = 0; if (metadata) { Obj *format = Hash_Fetch_Utf8(metadata, "format", 6); if (!format) { THROW(ERR, "Missing 'format' var"); } else { ivars->format = (int32_t)Obj_To_I64(format); if (ivars->format < 2 || ivars->format > 3) { THROW(ERR, "Unsupported sort cache format: %i32", ivars->format); } } } // Init. ivars->caches = Hash_new(0); // Either extract or fake up the "counts", "null_ords", and "ord_widths" // hashes. if (metadata) { ivars->counts = (Hash*)INCREF(CERTIFY(Hash_Fetch_Utf8(metadata, "counts", 6), HASH)); ivars->null_ords = (Hash*)Hash_Fetch_Utf8(metadata, "null_ords", 9); if (ivars->null_ords) { ivars->null_ords = (Hash*)INCREF(CERTIFY(ivars->null_ords, HASH)); } else { ivars->null_ords = Hash_new(0); } ivars->ord_widths = (Hash*)Hash_Fetch_Utf8(metadata, "ord_widths", 10); if (ivars->ord_widths) { ivars->ord_widths = (Hash*)INCREF(CERTIFY(ivars->ord_widths, HASH)); } else { ivars->ord_widths = Hash_new(0); } } else { ivars->counts = Hash_new(0); ivars->null_ords = Hash_new(0); ivars->ord_widths = Hash_new(0); } return self; }
Obj* Query_Load_IMP(Query *self, Obj *dump) { CHY_UNUSED_VAR(self); Hash *source = (Hash*)CERTIFY(dump, HASH); String *class_name = (String*)CERTIFY(Hash_Fetch_Utf8(source, "_class", 6), STRING); VTable *vtable = VTable_singleton(class_name, NULL); Query *loaded = (Query*)VTable_Make_Obj(vtable); Obj *boost = CERTIFY(Hash_Fetch_Utf8(source, "boost", 5), OBJ); Query_IVARS(loaded)->boost = (float)Obj_To_F64(boost); return (Obj*)loaded; }
Obj* TermQuery_Load_IMP(TermQuery *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); TermQuery_Load_t super_load = SUPER_METHOD_PTR(TERMQUERY, LUCY_TermQuery_Load); TermQuery *loaded = (TermQuery*)super_load(self, dump); TermQueryIVARS *loaded_ivars = TermQuery_IVARS(loaded); Obj *field = CERTIFY(Hash_Fetch_Utf8(source, "field", 5), OBJ); loaded_ivars->field = (String*)CERTIFY(Freezer_load(field), STRING); Obj *term = CERTIFY(Hash_Fetch_Utf8(source, "term", 4), OBJ); loaded_ivars->term = (Obj*)CERTIFY(Freezer_load(term), OBJ); return (Obj*)loaded; }
Snapshot* Snapshot_Read_File_IMP(Snapshot *self, Folder *folder, String *path) { SnapshotIVARS *const ivars = Snapshot_IVARS(self); // Eliminate all prior data. Pick a snapshot file. S_zero_out(self); ivars->path = (path != NULL && Str_Get_Size(path) > 0) ? Str_Clone(path) : IxFileNames_latest_snapshot(folder); if (ivars->path) { Hash *snap_data = (Hash*)CERTIFY(Json_slurp_json(folder, ivars->path), HASH); Obj *format_obj = CERTIFY(Hash_Fetch_Utf8(snap_data, "format", 6), OBJ); int32_t format = (int32_t)Json_obj_to_i64(format_obj); Obj *subformat_obj = Hash_Fetch_Utf8(snap_data, "subformat", 9); int32_t subformat = subformat_obj ? (int32_t)Json_obj_to_i64(subformat_obj) : 0; // Verify that we can read the index properly. if (format > Snapshot_current_file_format) { THROW(ERR, "Snapshot format too recent: %i32, %i32", format, Snapshot_current_file_format); } // Build up list of entries. Vector *list = (Vector*)INCREF(CERTIFY( Hash_Fetch_Utf8(snap_data, "entries", 7), VECTOR)); if (format == 1 || (format == 2 && subformat < 1)) { Vector *cleaned = S_clean_segment_contents(list); DECREF(list); list = cleaned; } Hash_Clear(ivars->entries); for (uint32_t i = 0, max = Vec_Get_Size(list); i < max; i++) { String *entry = (String*)CERTIFY(Vec_Fetch(list, i), STRING); Hash_Store(ivars->entries, entry, (Obj*)CFISH_TRUE); } DECREF(list); DECREF(snap_data); } return self; }
Obj* ProximityQuery_Load_IMP(ProximityQuery *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); ProximityQuery_Load_t super_load = SUPER_METHOD_PTR(PROXIMITYQUERY, LUCY_ProximityQuery_Load); ProximityQuery *loaded = (ProximityQuery*)super_load(self, dump); ProximityQueryIVARS *loaded_ivars = ProximityQuery_IVARS(loaded); Obj *field = CERTIFY(Hash_Fetch_Utf8(source, "field", 5), OBJ); loaded_ivars->field = (String*)CERTIFY(Freezer_load(field), STRING); Obj *terms = CERTIFY(Hash_Fetch_Utf8(source, "terms", 5), OBJ); loaded_ivars->terms = (VArray*)CERTIFY(Freezer_load(terms), VARRAY); Obj *within = CERTIFY(Hash_Fetch_Utf8(source, "within", 6), OBJ); loaded_ivars->within = (uint32_t)Obj_To_I64(within); return (Obj*)loaded; }
Obj* PhraseQuery_Load_IMP(PhraseQuery *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); PhraseQuery_Load_t super_load = SUPER_METHOD_PTR(PHRASEQUERY, LUCY_PhraseQuery_Load); PhraseQuery *loaded = (PhraseQuery*)super_load(self, dump); PhraseQueryIVARS *loaded_ivars = PhraseQuery_IVARS(loaded); Obj *field = CERTIFY(Hash_Fetch_Utf8(source, "field", 5), OBJ); loaded_ivars->field = (String*)CERTIFY(Freezer_load(field), STRING); Obj *terms = CERTIFY(Hash_Fetch_Utf8(source, "terms", 5), OBJ); loaded_ivars->terms = (Vector*)CERTIFY(Freezer_load(terms), VECTOR); return (Obj*)loaded; }
Obj* LeafQuery_Load_IMP(LeafQuery *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); LeafQuery_Load_t super_load = SUPER_METHOD_PTR(LEAFQUERY, LUCY_LeafQuery_Load); LeafQuery *loaded = (LeafQuery*)super_load(self, dump); LeafQueryIVARS *loaded_ivars = LeafQuery_IVARS(loaded); Obj *field = Hash_Fetch_Utf8(source, "field", 5); if (field) { loaded_ivars->field = (String*)CERTIFY(Freezer_load(field), STRING); } Obj *text = CERTIFY(Hash_Fetch_Utf8(source, "text", 4), OBJ); loaded_ivars->text = (String*)CERTIFY(Freezer_load(text), STRING); return (Obj*)loaded; }
void DefDelWriter_Merge_Segment_IMP(DefaultDeletionsWriter *self, SegReader *reader, I32Array *doc_map) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); UNUSED_VAR(doc_map); Segment *segment = SegReader_Get_Segment(reader); Hash *del_meta = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "deletions", 9); if (del_meta) { Vector *seg_readers = ivars->seg_readers; Hash *files = (Hash*)Hash_Fetch_Utf8(del_meta, "files", 5); if (files) { HashIterator *iter = HashIter_new(files); while (HashIter_Next(iter)) { String *seg = HashIter_Get_Key(iter); Hash *mini_meta = (Hash*)HashIter_Get_Value(iter); /* Find the segment the deletions from the SegReader * we're adding correspond to. If it's gone, we don't * need to worry about losing deletions files that point * at it. */ for (size_t i = 0, max = Vec_Get_Size(seg_readers); i < max; i++) { SegReader *candidate = (SegReader*)Vec_Fetch(seg_readers, i); String *candidate_name = Seg_Get_Name(SegReader_Get_Segment(candidate)); if (Str_Equals(seg, (Obj*)candidate_name)) { /* If the count hasn't changed, we're about to * merge away the most recent deletions file * pointing at this target segment -- so force a * new file to be written out. */ int32_t count = (int32_t)Json_obj_to_i64(Hash_Fetch_Utf8(mini_meta, "count", 5)); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Obtain( candidate, Class_Get_Name(DELETIONSREADER)); if (count == DelReader_Del_Count(del_reader)) { ivars->updated[i] = true; } break; } } } DECREF(iter); } } }
bool Seg_Read_File_IMP(Segment *self, Folder *folder) { SegmentIVARS *const ivars = Seg_IVARS(self); String *filename = Str_newf("%o/segmeta.json", ivars->name); Hash *metadata = (Hash*)Json_slurp_json(folder, filename); Hash *my_metadata; // Bail unless the segmeta file was read successfully. DECREF(filename); if (!metadata) { return false; } CERTIFY(metadata, HASH); // Grab metadata for the Segment object itself. DECREF(ivars->metadata); ivars->metadata = metadata; my_metadata = (Hash*)CERTIFY(Hash_Fetch_Utf8(ivars->metadata, "segmeta", 7), HASH); // Assign. Obj *count = Hash_Fetch_Utf8(my_metadata, "count", 5); if (!count) { count = Hash_Fetch_Utf8(my_metadata, "doc_count", 9); } if (!count) { THROW(ERR, "Missing 'count'"); } else { ivars->count = Json_obj_to_i64(count); } // Get list of field nums. Vector *source_by_num = (Vector*)Hash_Fetch_Utf8(my_metadata, "field_names", 11); size_t num_fields = source_by_num ? Vec_Get_Size(source_by_num) : 0; if (source_by_num == NULL) { THROW(ERR, "Failed to extract 'field_names' from metadata"); } // Init. DECREF(ivars->by_num); DECREF(ivars->by_name); ivars->by_num = Vec_new(num_fields); ivars->by_name = Hash_new(num_fields); // Copy the list of fields from the source. for (size_t i = 0; i < num_fields; i++) { String *name = (String*)Vec_Fetch(source_by_num, i); Seg_Add_Field(self, name); } return true; }
SnowballStemmer* SnowStemmer_Load_IMP(SnowballStemmer *self, Obj *dump) { SnowStemmer_Load_t super_load = SUPER_METHOD_PTR(SNOWBALLSTEMMER, LUCY_SnowStemmer_Load); SnowballStemmer *loaded = super_load(self, dump); Hash *source = (Hash*)CERTIFY(dump, HASH); String *language = (String*)CERTIFY(Hash_Fetch_Utf8(source, "language", 8), STRING); return SnowStemmer_init(loaded, language); }
EasyAnalyzer* EasyAnalyzer_Load_IMP(EasyAnalyzer *self, Obj *dump) { EasyAnalyzer_Load_t super_load = SUPER_METHOD_PTR(EASYANALYZER, LUCY_EasyAnalyzer_Load); EasyAnalyzer *loaded = super_load(self, dump); Hash *source = (Hash*)CERTIFY(dump, HASH); String *language = (String*)CERTIFY(Hash_Fetch_Utf8(source, "language", 8), STRING); return EasyAnalyzer_init(loaded, language); }
RegexTokenizer* RegexTokenizer_Load_IMP(RegexTokenizer *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); RegexTokenizer_Load_t super_load = SUPER_METHOD_PTR(REGEXTOKENIZER, LUCY_RegexTokenizer_Load); RegexTokenizer *loaded = super_load(self, dump); String *pattern = (String*)CERTIFY(Hash_Fetch_Utf8(source, "pattern", 7), STRING); return RegexTokenizer_init(loaded, pattern); }
static void test_normalization(TestBatchRunner *runner) { FSFolder *modules_folder = TestUtils_modules_folder(); if (modules_folder == NULL) { SKIP(runner, 13, "Can't locate test data"); return; } String *path = Str_newf("unicode/utf8proc/tests.json"); Vector *tests = (Vector*)Json_slurp_json((Folder*)modules_folder, path); if (!tests) { RETHROW(Err_get_error()); } for (uint32_t i = 0, max = Vec_Get_Size(tests); i < max; i++) { Hash *test = (Hash*)Vec_Fetch(tests, i); String *form = (String*)Hash_Fetch_Utf8( test, "normalization_form", 18); bool case_fold = Bool_Get_Value((Boolean*)Hash_Fetch_Utf8( test, "case_fold", 9)); bool strip_accents = Bool_Get_Value((Boolean*)Hash_Fetch_Utf8( test, "strip_accents", 13)); Normalizer *normalizer = Normalizer_new(form, case_fold, strip_accents); Vector *words = (Vector*)Hash_Fetch_Utf8(test, "words", 5); Vector *norms = (Vector*)Hash_Fetch_Utf8(test, "norms", 5); for (uint32_t j = 0, max = Vec_Get_Size(words); j < max; j++) { String *word = (String*)Vec_Fetch(words, j); Vector *got = Normalizer_Split(normalizer, word); String *norm = (String*)Vec_Fetch(got, 0); TEST_TRUE(runner, norm && Str_is_a(norm, STRING) && Str_Equals(norm, Vec_Fetch(norms, j)), "Normalize %s %d %d: %s", Str_Get_Ptr8(form), case_fold, strip_accents, Str_Get_Ptr8(word) ); DECREF(got); } DECREF(normalizer); } DECREF(tests); DECREF(modules_folder); DECREF(path); }
DefaultDocReader* DefDocReader_init(DefaultDocReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, Vector *segments, int32_t seg_tick) { Hash *metadata; Segment *segment; DocReader_init((DocReader*)self, schema, folder, snapshot, segments, seg_tick); DefaultDocReaderIVARS *const ivars = DefDocReader_IVARS(self); segment = DefDocReader_Get_Segment(self); metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "documents", 9); if (metadata) { String *seg_name = Seg_Get_Name(segment); String *ix_file = Str_newf("%o/documents.ix", seg_name); String *dat_file = Str_newf("%o/documents.dat", seg_name); Obj *format = Hash_Fetch_Utf8(metadata, "format", 6); // Check format. if (!format) { THROW(ERR, "Missing 'format' var"); } else { int64_t format_val = Json_obj_to_i64(format); if (format_val < DocWriter_current_file_format) { THROW(ERR, "Obsolete doc storage format %i64; " "Index regeneration is required", format_val); } else if (format_val != DocWriter_current_file_format) { THROW(ERR, "Unsupported doc storage format: %i64", format_val); } } // Get streams. if (Folder_Exists(folder, ix_file)) { ivars->ix_in = Folder_Open_In(folder, ix_file); if (!ivars->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } ivars->dat_in = Folder_Open_In(folder, dat_file); if (!ivars->dat_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } } DECREF(ix_file); DECREF(dat_file); } return self; }
Obj* PolyQuery_Load_IMP(PolyQuery *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); PolyQuery_Load_t super_load = SUPER_METHOD_PTR(POLYQUERY, LUCY_PolyQuery_Load); PolyQuery *loaded = (PolyQuery*)super_load(self, dump); Obj *children = CERTIFY(Hash_Fetch_Utf8(source, "children", 8), OBJ); PolyQuery_IVARS(loaded)->children = (Vector*)CERTIFY(Freezer_load(children), VECTOR); return (Obj*)loaded; }
HitDoc* HitDoc_Load_IMP(HitDoc *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); HitDoc_Load_t super_load = SUPER_METHOD_PTR(HITDOC, LUCY_HitDoc_Load); HitDoc *loaded = super_load(self, dump); HitDocIVARS *const loaded_ivars = HitDoc_IVARS(loaded); Obj *score = CERTIFY(Hash_Fetch_Utf8(source, "score", 5), OBJ); loaded_ivars->score = (float)Obj_To_F64(score); return loaded; }
static void test_offsets(TestBatchRunner *runner) { Folder *folder = S_folder_with_contents(); CompoundFileWriter *cf_writer = CFWriter_new(folder); Hash *cf_metadata; Hash *files; CFWriter_Consolidate(cf_writer); cf_metadata = (Hash*)CERTIFY( Json_slurp_json(folder, cfmeta_file), HASH); files = (Hash*)CERTIFY( Hash_Fetch_Utf8(cf_metadata, "files", 5), HASH); bool offsets_ok = true; TEST_TRUE(runner, Hash_Get_Size(files) > 0, "Multiple files"); HashIterator *iter = HashIter_new(files); while (HashIter_Next(iter)) { String *file = HashIter_Get_Key(iter); Hash *stats = (Hash*)CERTIFY(HashIter_Get_Value(iter), HASH); Obj *offset = CERTIFY(Hash_Fetch_Utf8(stats, "offset", 6), OBJ); int64_t offs = Obj_To_I64(offset); if (offs % 8 != 0) { offsets_ok = false; FAIL(runner, "Offset %" PRId64 " for %s not a multiple of 8", offset, Str_Get_Ptr8(file)); break; } } DECREF(iter); if (offsets_ok) { PASS(runner, "All offsets are multiples of 8"); } DECREF(cf_metadata); DECREF(cf_writer); DECREF(folder); }
DefaultHighlightReader* DefHLReader_init(DefaultHighlightReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, Vector *segments, int32_t seg_tick) { HLReader_init((HighlightReader*)self, schema, folder, snapshot, segments, seg_tick); DefaultHighlightReaderIVARS *const ivars = DefHLReader_IVARS(self); Segment *segment = DefHLReader_Get_Segment(self); Hash *metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "highlight", 9); if (!metadata) { metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "term_vectors", 12); } // Check format. if (metadata) { Obj *format = Hash_Fetch_Utf8(metadata, "format", 6); if (!format) { THROW(ERR, "Missing 'format' var"); } else { if (Json_obj_to_i64(format) != HLWriter_current_file_format) { THROW(ERR, "Unsupported highlight data format: %i64", Json_obj_to_i64(format)); } } } // Open instreams. String *seg_name = Seg_Get_Name(segment); String *ix_file = Str_newf("%o/highlight.ix", seg_name); String *dat_file = Str_newf("%o/highlight.dat", seg_name); if (Folder_Exists(folder, ix_file)) { ivars->ix_in = Folder_Open_In(folder, ix_file); if (!ivars->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } ivars->dat_in = Folder_Open_In(folder, dat_file); if (!ivars->dat_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } } DECREF(ix_file); DECREF(dat_file); return self; }
StringType* StringType_Load_IMP(StringType *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); String *class_name = (String*)Hash_Fetch_Utf8(source, "_class", 6); VTable *vtable = (class_name != NULL && Obj_Is_A((Obj*)class_name, STRING)) ? VTable_singleton(class_name, NULL) : STRINGTYPE; StringType *loaded = (StringType*)VTable_Make_Obj(vtable); Obj *boost_dump = Hash_Fetch_Utf8(source, "boost", 5); Obj *indexed_dump = Hash_Fetch_Utf8(source, "indexed", 7); Obj *stored_dump = Hash_Fetch_Utf8(source, "stored", 6); Obj *sortable_dump = Hash_Fetch_Utf8(source, "sortable", 8); UNUSED_VAR(self); float boost = boost_dump ? (float)Obj_To_F64(boost_dump) : 1.0f; bool indexed = indexed_dump ? Obj_To_Bool(indexed_dump) : true; bool stored = stored_dump ? Obj_To_Bool(stored_dump) : true; bool sortable = sortable_dump ? Obj_To_Bool(sortable_dump) : false; return StringType_init2(loaded, boost, indexed, stored, sortable); }
Obj* SnowStop_Load_IMP(SnowballStopFilter *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); SnowStop_Load_t super_load = SUPER_METHOD_PTR(SNOWBALLSTOPFILTER, LUCY_SnowStop_Load); SnowballStopFilter *loaded = (SnowballStopFilter*)super_load(self, dump); Obj *stoplist = Hash_Fetch_Utf8(source, "stoplist", 8); if (stoplist) { SnowStop_IVARS(loaded)->stoplist = (Hash*)CERTIFY(Freezer_load(stoplist), HASH); } return (Obj*)loaded; }
PolyAnalyzer* PolyAnalyzer_Load_IMP(PolyAnalyzer *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); PolyAnalyzer_Load_t super_load = SUPER_METHOD_PTR(POLYANALYZER, LUCY_PolyAnalyzer_Load); PolyAnalyzer *loaded = super_load(self, dump); VArray *analyzer_dumps = (VArray*)CERTIFY(Hash_Fetch_Utf8(source, "analyzers", 9), VARRAY); VArray *analyzers = (VArray*)CERTIFY(Freezer_load((Obj*)analyzer_dumps), VARRAY); PolyAnalyzer_init(loaded, NULL, analyzers); DECREF(analyzers); return loaded; }
Inversion* SnowStop_Transform_IMP(SnowballStopFilter *self, Inversion *inversion) { Token *token; Inversion *new_inversion = Inversion_new(NULL); SnowballStopFilterIVARS *const ivars = SnowStop_IVARS(self); Hash *const stoplist = ivars->stoplist; while (NULL != (token = Inversion_Next(inversion))) { TokenIVARS *const token_ivars = Token_IVARS(token); if (!Hash_Fetch_Utf8(stoplist, token_ivars->text, token_ivars->len)) { Inversion_Append(new_inversion, (Token*)INCREF(token)); } } return new_inversion; }
static void S_zap_dead_merge(FilePurger *self, Hash *candidates) { FilePurgerIVARS *const ivars = FilePurger_IVARS(self); IndexManager *manager = ivars->manager; Lock *merge_lock = IxManager_Make_Merge_Lock(manager); Lock_Clear_Stale(merge_lock); if (!Lock_Is_Locked(merge_lock)) { Hash *merge_data = IxManager_Read_Merge_Data(manager); Obj *cutoff = merge_data ? Hash_Fetch_Utf8(merge_data, "cutoff", 6) : NULL; if (cutoff) { String *cutoff_seg = Seg_num_to_name(Json_obj_to_i64(cutoff)); if (Folder_Exists(ivars->folder, cutoff_seg)) { String *merge_json = SSTR_WRAP_UTF8("merge.json", 10); DirHandle *dh = Folder_Open_Dir(ivars->folder, cutoff_seg); if (!dh) { THROW(ERR, "Can't open segment dir '%o'", cutoff_seg); } Hash_Store(candidates, cutoff_seg, (Obj*)CFISH_TRUE); Hash_Store(candidates, merge_json, (Obj*)CFISH_TRUE); while (DH_Next(dh)) { // TODO: recursively delete subdirs within seg dir. String *entry = DH_Get_Entry(dh); String *filepath = Str_newf("%o/%o", cutoff_seg, entry); Hash_Store(candidates, filepath, (Obj*)CFISH_TRUE); DECREF(filepath); DECREF(entry); } DECREF(dh); } DECREF(cutoff_seg); } DECREF(merge_data); } DECREF(merge_lock); return; }
static Obj* S_load_from_hash(Hash *dump) { String *class_name = (String*)Hash_Fetch_Utf8(dump, "_class", 6); // Assume that the presence of the "_class" key paired with a valid class // name indicates the output of a dump() rather than an ordinary Hash. if (class_name && Str_is_a(class_name, STRING)) { Class *klass = Class_fetch_class(class_name); if (!klass) { String *parent_class_name = Class_find_parent_class(class_name); if (parent_class_name) { Class *parent = Class_singleton(parent_class_name, NULL); klass = Class_singleton(class_name, parent); DECREF(parent_class_name); } else { // TODO: Fix load() so that it works with ordinary hash keys // named "_class". THROW(ERR, "Can't find class '%o'", class_name); } } // Dispatch to an alternate Load() method. if (klass) { return S_load_via_load_method(klass, (Obj*)dump); } } // It's an ordinary Hash. Hash *loaded = Hash_new(Hash_Get_Size(dump)); HashIterator *iter = HashIter_new(dump); while (HashIter_Next(iter)) { String *key = HashIter_Get_Key(iter); Obj *value = HashIter_Get_Value(iter); Hash_Store(loaded, key, Freezer_load(value)); } DECREF(iter); return (Obj*)loaded; }
FullTextType* FullTextType_Load_IMP(FullTextType *self, Obj *dump) { UNUSED_VAR(self); Hash *source = (Hash*)CERTIFY(dump, HASH); String *class_name = (String*)Hash_Fetch_Utf8(source, "_class", 6); Class *klass = (class_name != NULL && Obj_is_a((Obj*)class_name, STRING)) ? Class_singleton(class_name, NULL) : FULLTEXTTYPE; FullTextType *loaded = (FullTextType*)Class_Make_Obj(klass); // Extract boost. Obj *boost_dump = Hash_Fetch_Utf8(source, "boost", 5); float boost = boost_dump ? (float)Json_obj_to_f64(boost_dump) : 1.0f; // Find boolean properties. Obj *indexed_dump = Hash_Fetch_Utf8(source, "indexed", 7); Obj *stored_dump = Hash_Fetch_Utf8(source, "stored", 6); Obj *sort_dump = Hash_Fetch_Utf8(source, "sortable", 8); Obj *hl_dump = Hash_Fetch_Utf8(source, "highlightable", 13); bool indexed = indexed_dump ? Json_obj_to_bool(indexed_dump) : true; bool stored = stored_dump ? Json_obj_to_bool(stored_dump) : true; bool sortable = sort_dump ? Json_obj_to_bool(sort_dump) : false; bool hl = hl_dump ? Json_obj_to_bool(hl_dump) : false; // Extract an Analyzer. Obj *analyzer_dump = Hash_Fetch_Utf8(source, "analyzer", 8); Analyzer *analyzer = NULL; if (analyzer_dump) { if (Obj_is_a(analyzer_dump, ANALYZER)) { // Schema munged the dump and installed a shared analyzer. analyzer = (Analyzer*)INCREF(analyzer_dump); } else if (Obj_is_a((Obj*)analyzer_dump, HASH)) { analyzer = (Analyzer*)Freezer_load(analyzer_dump); } } CERTIFY(analyzer, ANALYZER); FullTextType_init2(loaded, analyzer, boost, indexed, stored, sortable, hl); DECREF(analyzer); return loaded; }