void Hash_serialize(Hash *self, OutStream *outstream) { Obj *key; Obj *val; uint32_t charbuf_count = 0; OutStream_Write_C32(outstream, self->size); // Write CharBuf keys first. CharBuf keys are the common case; grouping // them together is a form of run-length-encoding and saves space, since // we omit the per-key class name. Hash_Iterate(self); while (Hash_Next(self, &key, &val)) { if (Obj_Is_A(key, CHARBUF)) { charbuf_count++; } } OutStream_Write_C32(outstream, charbuf_count); Hash_Iterate(self); while (Hash_Next(self, &key, &val)) { if (Obj_Is_A(key, CHARBUF)) { Obj_Serialize(key, outstream); FREEZE(val, outstream); } } // Punt on the classes of the remaining keys. Hash_Iterate(self); while (Hash_Next(self, &key, &val)) { if (!Obj_Is_A(key, CHARBUF)) { FREEZE(key, outstream); FREEZE(val, outstream); } } }
static void S_init_sub_readers(PolyReader *self, VArray *sub_readers) { PolyReaderIVARS *const ivars = PolyReader_IVARS(self); uint32_t num_sub_readers = VA_Get_Size(sub_readers); int32_t *starts = (int32_t*)MALLOCATE(num_sub_readers * sizeof(int32_t)); Hash *data_readers = Hash_new(0); DECREF(ivars->sub_readers); DECREF(ivars->offsets); ivars->sub_readers = (VArray*)INCREF(sub_readers); // Accumulate doc_max, subreader start offsets, and DataReaders. ivars->doc_max = 0; for (uint32_t i = 0; i < num_sub_readers; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(sub_readers, i); Hash *components = SegReader_Get_Components(seg_reader); CharBuf *api; DataReader *component; starts[i] = ivars->doc_max; ivars->doc_max += SegReader_Doc_Max(seg_reader); Hash_Iterate(components); while (Hash_Next(components, (Obj**)&api, (Obj**)&component)) { VArray *readers = (VArray*)Hash_Fetch(data_readers, (Obj*)api); if (!readers) { readers = VA_new(num_sub_readers); Hash_Store(data_readers, (Obj*)api, (Obj*)readers); } VA_Store(readers, i, INCREF(component)); } } ivars->offsets = I32Arr_new_steal(starts, num_sub_readers); CharBuf *api; VArray *readers; Hash_Iterate(data_readers); while (Hash_Next(data_readers, (Obj**)&api, (Obj**)&readers)) { DataReader *datareader = (DataReader*)CERTIFY(S_first_non_null(readers), DATAREADER); DataReader *aggregator = DataReader_Aggregator(datareader, readers, ivars->offsets); if (aggregator) { CERTIFY(aggregator, DATAREADER); Hash_Store(ivars->components, (Obj*)api, (Obj*)aggregator); } } DECREF(data_readers); DeletionsReader *del_reader = (DeletionsReader*)Hash_Fetch( ivars->components, (Obj*)VTable_Get_Name(DELETIONSREADER)); ivars->del_count = del_reader ? DelReader_Del_Count(del_reader) : 0; }
static void test_stemming(TestBatchRunner *runner) { FSFolder *modules_folder = TestUtils_modules_folder(); String *path = Str_newf("analysis/snowstem/source/test/tests.json"); Hash *tests = (Hash*)Json_slurp_json((Folder*)modules_folder, path); if (!tests) { RETHROW(Err_get_error()); } String *iso; Hash *lang_data; Hash_Iterate(tests); while (Hash_Next(tests, (Obj**)&iso, (Obj**)&lang_data)) { VArray *words = (VArray*)Hash_Fetch_Utf8(lang_data, "words", 5); VArray *stems = (VArray*)Hash_Fetch_Utf8(lang_data, "stems", 5); SnowballStemmer *stemmer = SnowStemmer_new(iso); for (uint32_t i = 0, max = VA_Get_Size(words); i < max; i++) { String *word = (String*)VA_Fetch(words, i); VArray *got = SnowStemmer_Split(stemmer, word); String *stem = (String*)VA_Fetch(got, 0); TEST_TRUE(runner, stem && Str_Is_A(stem, STRING) && Str_Equals(stem, VA_Fetch(stems, i)), "Stem %s: %s", Str_Get_Ptr8(iso), Str_Get_Ptr8(word) ); DECREF(got); } DECREF(stemmer); } DECREF(tests); DECREF(modules_folder); DECREF(path); }
bool_t Hash_equals(Hash *self, Obj *other) { Hash *twin = (Hash*)other; Obj *key; Obj *val; if (twin == self) { return true; } if (!Obj_Is_A(other, HASH)) { return false; } if (self->size != twin->size) { return false; } Hash_Iterate(self); while (Hash_Next(self, &key, &val)) { Obj *other_val = Hash_Fetch(twin, key); if (!other_val || !Obj_Equals(other_val, val)) { return false; } } return true; }
VArray* Hash_Values_IMP(Hash *self) { Obj *key; Obj *val; VArray *values = VA_new(self->size); Hash_Iterate(self); while (Hash_Next(self, &key, &val)) { VA_Push(values, INCREF(val)); } return values; }
static void test_Keys_Values_Iter(TestBatch *batch) { Hash *hash = Hash_new(0); // trigger multiple rebuilds. VArray *expected = VA_new(100); VArray *keys; VArray *values; for (uint32_t i = 0; i < 500; i++) { CharBuf *cb = CB_newf("%u32", i); Hash_Store(hash, (Obj*)cb, (Obj*)cb); VA_Push(expected, INCREF(cb)); } VA_Sort(expected, NULL, NULL); keys = Hash_Keys(hash); values = Hash_Values(hash); VA_Sort(keys, NULL, NULL); VA_Sort(values, NULL, NULL); TEST_TRUE(batch, VA_Equals(keys, (Obj*)expected), "Keys"); TEST_TRUE(batch, VA_Equals(values, (Obj*)expected), "Values"); VA_Clear(keys); VA_Clear(values); { Obj *key; Obj *value; Hash_Iterate(hash); while (Hash_Next(hash, &key, &value)) { VA_Push(keys, INCREF(key)); VA_Push(values, INCREF(value)); } } VA_Sort(keys, NULL, NULL); VA_Sort(values, NULL, NULL); TEST_TRUE(batch, VA_Equals(keys, (Obj*)expected), "Keys from Iter"); TEST_TRUE(batch, VA_Equals(values, (Obj*)expected), "Values from Iter"); { ZombieCharBuf *forty = ZCB_WRAP_STR("40", 2); ZombieCharBuf *nope = ZCB_WRAP_STR("nope", 4); Obj *key = Hash_Find_Key(hash, (Obj*)forty, ZCB_Hash_Sum(forty)); TEST_TRUE(batch, Obj_Equals(key, (Obj*)forty), "Find_Key"); key = Hash_Find_Key(hash, (Obj*)nope, ZCB_Hash_Sum(nope)), TEST_TRUE(batch, key == NULL, "Find_Key returns NULL for non-existent key"); } DECREF(hash); DECREF(expected); DECREF(keys); DECREF(values); }
VArray* Hash_Keys_IMP(Hash *self) { Obj *key; Obj *val; VArray *keys = VA_new(self->size); Hash_Iterate(self); while (Hash_Next(self, &key, &val)) { VA_Push(keys, INCREF(key)); } return keys; }
Obj* Hash_load(Hash *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); CharBuf *class_name = (CharBuf*)Hash_Fetch_Str(source, "_class", 6); UNUSED_VAR(self); // Assume that the presence of the "_class" key paired with a valid class // name indicates the output of a Dump rather than an ordinary Hash. */ if (class_name && CB_Is_A(class_name, CHARBUF)) { VTable *vtable = VTable_fetch_vtable(class_name); if (!vtable) { CharBuf *parent_class = VTable_find_parent_class(class_name); if (parent_class) { VTable *parent = VTable_singleton(parent_class, NULL); vtable = VTable_singleton(class_name, parent); DECREF(parent_class); } else { // TODO: Fix Hash_Load() so that it works with ordinary hash // keys named "_class". THROW(ERR, "Can't find class '%o'", class_name); } } // Dispatch to an alternate Load() method. if (vtable) { Obj_Load_t load = METHOD_PTR(vtable, Lucy_Obj_Load); if (load == Obj_load) { THROW(ERR, "Abstract method Load() not defined for %o", VTable_Get_Name(vtable)); } else if (load != (Obj_Load_t)Hash_load) { // stop inf loop return VTable_Load_Obj(vtable, dump); } } } // It's an ordinary Hash. Hash *loaded = Hash_new(source->size); Obj *key; Obj *value; Hash_Iterate(source); while (Hash_Next(source, &key, &value)) { Hash_Store(loaded, key, Obj_Load(value, value)); } return (Obj*)loaded; }
Hash* Hash_dump(Hash *self) { Hash *dump = Hash_new(self->size); Obj *key; Obj *value; Hash_Iterate(self); while (Hash_Next(self, &key, &value)) { // Since JSON only supports text hash keys, Dump() can only support // text hash keys. CERTIFY(key, CHARBUF); Hash_Store(dump, key, Obj_Dump(value)); } return dump; }
void DefDelWriter_Merge_Segment_IMP(DefaultDeletionsWriter *self, SegReader *reader, I32Array *doc_map) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); UNUSED_VAR(doc_map); Segment *segment = SegReader_Get_Segment(reader); Hash *del_meta = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "deletions", 9); if (del_meta) { VArray *seg_readers = ivars->seg_readers; Hash *files = (Hash*)Hash_Fetch_Utf8(del_meta, "files", 5); if (files) { String *seg; Hash *mini_meta; Hash_Iterate(files); while (Hash_Next(files, (Obj**)&seg, (Obj**)&mini_meta)) { /* Find the segment the deletions from the SegReader * we're adding correspond to. If it's gone, we don't * need to worry about losing deletions files that point * at it. */ for (uint32_t i = 0, max = VA_Get_Size(seg_readers); i < max; i++) { SegReader *candidate = (SegReader*)VA_Fetch(seg_readers, i); String *candidate_name = Seg_Get_Name(SegReader_Get_Segment(candidate)); if (Str_Equals(seg, (Obj*)candidate_name)) { /* If the count hasn't changed, we're about to * merge away the most recent deletions file * pointing at this target segment -- so force a * new file to be written out. */ int32_t count = (int32_t)Obj_To_I64(Hash_Fetch_Utf8(mini_meta, "count", 5)); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Obtain( candidate, Class_Get_Name(DELETIONSREADER)); if (count == DelReader_Del_Count(del_reader)) { ivars->updated[i] = true; } break; } } } } } }
void Inverter_invert_doc(Inverter *self, Doc *doc) { InverterIVARS *const ivars = Inverter_IVARS(self); Hash *const fields = (Hash*)Doc_Get_Fields(doc); uint32_t num_keys = Hash_Iterate(fields); // Prepare for the new doc. Inverter_Set_Doc(self, doc); // Extract and invert the doc's fields. while (num_keys--) { Obj *key, *obj; Hash_Next(fields, &key, &obj); CharBuf *field = (CharBuf*)CERTIFY(key, CHARBUF); InverterEntry *inventry = S_fetch_entry(ivars, field); InverterEntryIVARS *inventry_ivars = InvEntry_IVARS(inventry); FieldType *type = inventry_ivars->type; // Get the field value. switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: { CharBuf *char_buf = (CharBuf*)CERTIFY(obj, CHARBUF); ViewCharBuf *value = (ViewCharBuf*)inventry_ivars->value; ViewCB_Assign(value, char_buf); break; } case FType_BLOB: { ByteBuf *byte_buf = (ByteBuf*)CERTIFY(obj, BYTEBUF); ViewByteBuf *value = (ViewByteBuf*)inventry_ivars->value; ViewBB_Assign(value, byte_buf); break; } case FType_INT32: { int32_t int_val = (int32_t)Obj_To_I64(obj); Integer32* value = (Integer32*)inventry_ivars->value; Int32_Set_Value(value, int_val); break; } case FType_INT64: { int64_t int_val = Obj_To_I64(obj); Integer64* value = (Integer64*)inventry_ivars->value; Int64_Set_Value(value, int_val); break; } case FType_FLOAT32: { float float_val = (float)Obj_To_F64(obj); Float32* value = (Float32*)inventry_ivars->value; Float32_Set_Value(value, float_val); break; } case FType_FLOAT64: { double float_val = Obj_To_F64(obj); Float64* value = (Float64*)inventry_ivars->value; Float64_Set_Value(value, float_val); break; } default: THROW(ERR, "Unrecognized type: %o", type); } Inverter_Add_Field(self, inventry); } }