LexiconWriter* LexWriter_init(LexiconWriter *self, Snapshot *snapshot, Segment *segment, PolyReader *polyreader) { Schema *schema = PolyReader_Get_Schema(polyreader); Architecture *arch = Schema_Get_Architecture(schema); DataWriter_init((DataWriter*)self, snapshot, segment, polyreader); /* Assign. */ self->index_interval = Arch_Index_Interval(arch); self->skip_interval = Arch_Skip_Interval(arch); /* Init. */ self->ix_out = NULL; self->ixix_out = NULL; self->dat_out = NULL; self->count = 0; self->ix_count = 0; self->last_tinfo = TInfo_new(0,0,0,0); self->last_text = CB_new(40); self->dat_file = CB_new(30); self->ix_file = CB_new(30); self->ixix_file = CB_new(30); self->counts = Hash_new(0); self->ix_counts = Hash_new(0); self->stepper = NULL; self->temp_mode = false; /* Derive. */ self->stepper = LexStepper_new((CharBuf*)&EMPTY, self->skip_interval); return self; }
DocVector* DocVec_init(DocVector *self) { DocVectorIVARS *const ivars = DocVec_IVARS(self); ivars->field_bufs = Hash_new(0); ivars->field_vectors = Hash_new(0); return self; }
Hash* DefDelWriter_Metadata_IMP(DefaultDeletionsWriter *self) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); DefDelWriter_Metadata_t super_meta = (DefDelWriter_Metadata_t)SUPER_METHOD_PTR(DEFAULTDELETIONSWRITER, LUCY_DefDelWriter_Metadata); Hash *const metadata = super_meta(self); Hash *const files = Hash_new(0); for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i); if (ivars->updated[i]) { BitVector *deldocs = (BitVector*)VA_Fetch(ivars->bit_vecs, i); Segment *segment = SegReader_Get_Segment(seg_reader); Hash *mini_meta = Hash_new(2); Hash_Store_Utf8(mini_meta, "count", 5, (Obj*)Str_newf("%u32", (uint32_t)BitVec_Count(deldocs))); Hash_Store_Utf8(mini_meta, "filename", 8, (Obj*)S_del_filename(self, seg_reader)); Hash_Store(files, (Obj*)Seg_Get_Name(segment), (Obj*)mini_meta); } } Hash_Store_Utf8(metadata, "files", 5, (Obj*)files); return metadata; }
Hash_T Hash_load(const char *file) { int fd, read, done = 0, i; int d[2]; Hash_T h = NULL; if((fd=File_openRead(file)) != -1) { h = Hash_new(); while(!done) { for(i=0; i<2; i++) { read = File_read(fd, &d[i], sizeof(int)); if(read == -1) goto err; else if(read == 0) done = 1; else Hash_add(h, d[0], d[1]); } } } return h; err: Hash_free(&h); return Hash_new(); }
LexiconWriter* LexWriter_init(LexiconWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader) { Architecture *arch = Schema_Get_Architecture(schema); DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader); // Assign. self->index_interval = Arch_Index_Interval(arch); self->skip_interval = Arch_Skip_Interval(arch); // Init. self->ix_out = NULL; self->ixix_out = NULL; self->dat_out = NULL; self->count = 0; self->ix_count = 0; self->dat_file = CB_new(30); self->ix_file = CB_new(30); self->ixix_file = CB_new(30); self->counts = Hash_new(0); self->ix_counts = Hash_new(0); self->temp_mode = false; self->term_stepper = NULL; self->tinfo_stepper = (TermStepper*)MatchTInfoStepper_new(schema); return self; }
static void testNew() { Hash_T h0 = Hash_new(0); EXPECT_NOT_NULL(h0); Hash_T h1 = Hash_new(1); EXPECT_NOT_NULL(h1); Hash_T h2 = Hash_new(2); EXPECT_NOT_NULL(h2); }
static void testFree() { Hash_T h0 = Hash_new(0); Hash_free(&h0); EXPECT_NULL(h0); Hash_T h1 = Hash_new(1); Hash_free(&h1); EXPECT_NULL(h1); Hash_T h2 = Hash_new(2); Hash_free(&h2); EXPECT_NULL(h2); }
Hash* FullTextType_Dump_For_Schema_IMP(FullTextType *self) { FullTextTypeIVARS *const ivars = FullTextType_IVARS(self); Hash *dump = Hash_new(0); Hash_Store_Utf8(dump, "type", 4, (Obj*)Str_newf("fulltext")); // Store attributes that override the defaults. if (ivars->boost != 1.0) { Hash_Store_Utf8(dump, "boost", 5, (Obj*)Str_newf("%f64", ivars->boost)); } if (!ivars->indexed) { Hash_Store_Utf8(dump, "indexed", 7, (Obj*)CFISH_FALSE); } if (!ivars->stored) { Hash_Store_Utf8(dump, "stored", 6, (Obj*)CFISH_FALSE); } if (ivars->sortable) { Hash_Store_Utf8(dump, "sortable", 8, (Obj*)CFISH_TRUE); } if (ivars->highlightable) { Hash_Store_Utf8(dump, "highlightable", 13, (Obj*)CFISH_TRUE); } return dump; }
static void testInsert() { // test general case Hash_T h0 = Hash_new(0); EXPECT_EQ_UINT32(0, h0->nElements); EXPECT_EQ_UINT32(0, h0->tableSize); int key1 = 123; char* value1 = "1"; h0 = Hash_insert(h0, Atom_newFromInt64(key1), value1); EXPECT_NOT_NULL(h0); EXPECT_EQ_UINT32(1, h0->nElements); EXPECT_EQ_UINT32(1, h0->tableSize); int key2 = -27; char value2[] = "value2"; h0 = Hash_insert(h0, Atom_newFromInt64(key2), value2); EXPECT_NOT_NULL(h0); EXPECT_EQ_UINT32(2, h0->nElements); EXPECT_EQ_UINT32(3, h0->tableSize); char * key3 = "abc"; int16_t value3 = 1056; Str_T s = Str_newFromInt16(value3); h0 = Hash_insert(h0, Atom_newFromString(key3), Str_str(s)); EXPECT_NOT_NULL(h0); EXPECT_EQ_UINT32(3, h0->nElements); EXPECT_EQ_UINT32(3, h0->tableSize); Str_free(&s); Hash_free(&h0); EXPECT_NULL(h0); }
// Create a test data structure including at least one each of Hash, Vector, // and String. static Obj* S_make_dump() { Hash *dump = Hash_new(0); Hash_Store_Utf8(dump, "foo", 3, (Obj*)Str_newf("foo")); Hash_Store_Utf8(dump, "stuff", 5, (Obj*)Vec_new(0)); return (Obj*)dump; }
static void test_Dump_and_Load(TestBatch *batch) { Hash *hash = Hash_new(0); Obj *dump; Hash *loaded; Hash_Store_Str(hash, "foo", 3, (Obj*)CB_new_from_trusted_utf8("foo", 3)); dump = (Obj*)Hash_Dump(hash); loaded = (Hash*)Obj_Load(dump, dump); TEST_TRUE(batch, Hash_Equals(hash, (Obj*)loaded), "Dump => Load round trip"); DECREF(dump); DECREF(loaded); /* TODO: Fix Hash_Load(). Hash_Store_Str(hash, "_class", 6, (Obj*)CB_new_from_trusted_utf8("not_a_class", 11)); dump = (Obj*)Hash_Dump(hash); loaded = (Hash*)Obj_Load(dump, dump); TEST_TRUE(batch, Hash_Equals(hash, (Obj*)loaded), "Load still works with _class if it's not a real class"); DECREF(dump); DECREF(loaded); */ DECREF(hash); }
Hash* DataWriter_Metadata_IMP(DataWriter *self) { Hash *metadata = Hash_new(0); Hash_Store_Utf8(metadata, "format", 6, (Obj*)Str_newf("%i32", DataWriter_Format(self))); return metadata; }
// Create a test data structure including at least one each of Hash, VArray, // and CharBuf. static Obj* S_make_dump() { Hash *dump = Hash_new(0); Hash_Store_Str(dump, "foo", 3, (Obj*)CB_newf("foo")); Hash_Store_Str(dump, "stuff", 5, (Obj*)VA_new(0)); return (Obj*)dump; }
Hash* SnowStop_gen_stoplist(String *language) { char lang[2]; lang[0] = tolower(Str_Code_Point_At(language, 0)); lang[1] = tolower(Str_Code_Point_At(language, 1)); const uint8_t **words = NULL; if (memcmp(lang, "da", 2) == 0) { words = SnowStop_snow_da; } else if (memcmp(lang, "de", 2) == 0) { words = SnowStop_snow_de; } else if (memcmp(lang, "en", 2) == 0) { words = SnowStop_snow_en; } else if (memcmp(lang, "es", 2) == 0) { words = SnowStop_snow_es; } else if (memcmp(lang, "fi", 2) == 0) { words = SnowStop_snow_fi; } else if (memcmp(lang, "fr", 2) == 0) { words = SnowStop_snow_fr; } else if (memcmp(lang, "hu", 2) == 0) { words = SnowStop_snow_hu; } else if (memcmp(lang, "it", 2) == 0) { words = SnowStop_snow_it; } else if (memcmp(lang, "nl", 2) == 0) { words = SnowStop_snow_nl; } else if (memcmp(lang, "no", 2) == 0) { words = SnowStop_snow_no; } else if (memcmp(lang, "pt", 2) == 0) { words = SnowStop_snow_pt; } else if (memcmp(lang, "ru", 2) == 0) { words = SnowStop_snow_ru; } else if (memcmp(lang, "sv", 2) == 0) { words = SnowStop_snow_sv; } else { return NULL; } size_t num_stopwords = 0; for (uint32_t i = 0; words[i] != NULL; i++) { num_stopwords++; } Hash *stoplist = Hash_new(num_stopwords); for (uint32_t i = 0; words[i] != NULL; i++) { char *word = (char*)words[i]; String *stop = Str_new_wrap_trusted_utf8(word, strlen(word)); Hash_Store(stoplist, stop, (Obj*)CFISH_TRUE); DECREF(stop); } return (Hash*)stoplist; }
Schema* Schema_init(Schema *self) { SchemaIVARS *const ivars = Schema_IVARS(self); // Init. ivars->analyzers = Hash_new(0); ivars->types = Hash_new(0); ivars->sims = Hash_new(0); ivars->uniq_analyzers = Vec_new(2); Vec_Resize(ivars->uniq_analyzers, 1); // Assign. ivars->arch = Schema_Architecture(self); ivars->sim = Arch_Make_Similarity(ivars->arch); return self; }
Hash* Schema_Dump_IMP(Schema *self) { SchemaIVARS *const ivars = Schema_IVARS(self); Hash *dump = Hash_new(0); Hash *type_dumps = Hash_new(Hash_Get_Size(ivars->types)); // Record class name, store dumps of unique Analyzers. Hash_Store_Utf8(dump, "_class", 6, (Obj*)Str_Clone(Schema_get_class_name(self))); Hash_Store_Utf8(dump, "analyzers", 9, Freezer_dump((Obj*)ivars->uniq_analyzers)); // Dump FieldTypes. Hash_Store_Utf8(dump, "fields", 6, (Obj*)type_dumps); HashIterator *iter = HashIter_new(ivars->types); while (HashIter_Next(iter)) { String *field = HashIter_Get_Key(iter); FieldType *type = (FieldType*)HashIter_Get_Value(iter); Class *type_class = FType_get_class(type); // Dump known types to simplified format. if (type_class == FULLTEXTTYPE) { FullTextType *fttype = (FullTextType*)type; Hash *type_dump = FullTextType_Dump_For_Schema(fttype); Analyzer *analyzer = FullTextType_Get_Analyzer(fttype); uint32_t tick = S_find_in_array(ivars->uniq_analyzers, (Obj*)analyzer); // Store the tick which references a unique analyzer. Hash_Store_Utf8(type_dump, "analyzer", 8, (Obj*)Str_newf("%u32", tick)); Hash_Store(type_dumps, field, (Obj*)type_dump); } else if (type_class == STRINGTYPE || type_class == BLOBTYPE) { Hash *type_dump = FType_Dump_For_Schema(type); Hash_Store(type_dumps, field, (Obj*)type_dump); } // Unknown FieldType type, so punt. else { Hash_Store(type_dumps, field, FType_Dump(type)); } } DECREF(iter); return dump; }
static void S_zero_out(Snapshot *self) { DECREF(self->entries); DECREF(self->path); self->entries = Hash_new(0); self->path = NULL; }
RAMFolder* RAMFolder_init(RAMFolder *self, const CharBuf *path) { Folder_init((Folder*)self, path); self->elems = Hash_new(16); if (CB_Get_Size(self->path) != 0) S_read_fsfolder(self); return self; }
static void S_zero_out(Snapshot *self) { SnapshotIVARS *const ivars = Snapshot_IVARS(self); DECREF(ivars->entries); DECREF(ivars->path); ivars->entries = Hash_new(0); ivars->path = NULL; }
static void S_zero_out(Snapshot *self) { DECREF(self->entries); DECREF(self->filename); self->entries = Hash_new(0); self->filename = NULL; }
T HashSet_new (int (*hashCode)(Poly_t) , Poly_tyEquals equals) { T set; Mem_NEW(set); // we don't use "dup" set->hash = Hash_new (hashCode, equals, 0); return set; }
Obj* Query_Dump_IMP(Query *self) { QueryIVARS *ivars = Query_IVARS(self); Hash *dump = Hash_new(0); Hash_Store_Utf8(dump, "_class", 6, (Obj*)Str_Clone(Obj_Get_Class_Name((Obj*)self))); Hash_Store_Utf8(dump, "boost", 5, (Obj*)Str_newf("%f64", (double)ivars->boost)); return (Obj*)dump; }
static void test_Keys_Values_Iter(TestBatch *batch) { Hash *hash = Hash_new(0); // trigger multiple rebuilds. VArray *expected = VA_new(100); VArray *keys; VArray *values; for (uint32_t i = 0; i < 500; i++) { CharBuf *cb = CB_newf("%u32", i); Hash_Store(hash, (Obj*)cb, (Obj*)cb); VA_Push(expected, INCREF(cb)); } VA_Sort(expected, NULL, NULL); keys = Hash_Keys(hash); values = Hash_Values(hash); VA_Sort(keys, NULL, NULL); VA_Sort(values, NULL, NULL); TEST_TRUE(batch, VA_Equals(keys, (Obj*)expected), "Keys"); TEST_TRUE(batch, VA_Equals(values, (Obj*)expected), "Values"); VA_Clear(keys); VA_Clear(values); { Obj *key; Obj *value; Hash_Iterate(hash); while (Hash_Next(hash, &key, &value)) { VA_Push(keys, INCREF(key)); VA_Push(values, INCREF(value)); } } VA_Sort(keys, NULL, NULL); VA_Sort(values, NULL, NULL); TEST_TRUE(batch, VA_Equals(keys, (Obj*)expected), "Keys from Iter"); TEST_TRUE(batch, VA_Equals(values, (Obj*)expected), "Values from Iter"); { ZombieCharBuf *forty = ZCB_WRAP_STR("40", 2); ZombieCharBuf *nope = ZCB_WRAP_STR("nope", 4); Obj *key = Hash_Find_Key(hash, (Obj*)forty, ZCB_Hash_Sum(forty)); TEST_TRUE(batch, Obj_Equals(key, (Obj*)forty), "Find_Key"); key = Hash_Find_Key(hash, (Obj*)nope, ZCB_Hash_Sum(nope)), TEST_TRUE(batch, key == NULL, "Find_Key returns NULL for non-existent key"); } DECREF(hash); DECREF(expected); DECREF(keys); DECREF(values); }
SortWriter* SortWriter_init(SortWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader) { uint32_t field_max = Schema_Num_Fields(schema) + 1; DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader); SortWriterIVARS *const ivars = SortWriter_IVARS(self); // Init. ivars->field_writers = VA_new(field_max); ivars->counts = Hash_new(0); ivars->null_ords = Hash_new(0); ivars->ord_widths = Hash_new(0); ivars->temp_ord_out = NULL; ivars->temp_ix_out = NULL; ivars->temp_dat_out = NULL; ivars->mem_pool = MemPool_new(0); ivars->mem_thresh = default_mem_thresh; ivars->flush_at_finish = false; return self; }
SegWriter* SegWriter_init(SegWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader) { Architecture *arch = Schema_Get_Architecture(schema); DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader); SegWriterIVARS *const ivars = SegWriter_IVARS(self); ivars->by_api = Hash_new(0); ivars->inverter = Inverter_new(schema, segment); ivars->writers = Vec_new(16); Arch_Init_Seg_Writer(arch, self); return self; }
static void S_init_sub_readers(PolyReader *self, VArray *sub_readers) { PolyReaderIVARS *const ivars = PolyReader_IVARS(self); uint32_t num_sub_readers = VA_Get_Size(sub_readers); int32_t *starts = (int32_t*)MALLOCATE(num_sub_readers * sizeof(int32_t)); Hash *data_readers = Hash_new(0); DECREF(ivars->sub_readers); DECREF(ivars->offsets); ivars->sub_readers = (VArray*)INCREF(sub_readers); // Accumulate doc_max, subreader start offsets, and DataReaders. ivars->doc_max = 0; for (uint32_t i = 0; i < num_sub_readers; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(sub_readers, i); Hash *components = SegReader_Get_Components(seg_reader); CharBuf *api; DataReader *component; starts[i] = ivars->doc_max; ivars->doc_max += SegReader_Doc_Max(seg_reader); Hash_Iterate(components); while (Hash_Next(components, (Obj**)&api, (Obj**)&component)) { VArray *readers = (VArray*)Hash_Fetch(data_readers, (Obj*)api); if (!readers) { readers = VA_new(num_sub_readers); Hash_Store(data_readers, (Obj*)api, (Obj*)readers); } VA_Store(readers, i, INCREF(component)); } } ivars->offsets = I32Arr_new_steal(starts, num_sub_readers); CharBuf *api; VArray *readers; Hash_Iterate(data_readers); while (Hash_Next(data_readers, (Obj**)&api, (Obj**)&readers)) { DataReader *datareader = (DataReader*)CERTIFY(S_first_non_null(readers), DATAREADER); DataReader *aggregator = DataReader_Aggregator(datareader, readers, ivars->offsets); if (aggregator) { CERTIFY(aggregator, DATAREADER); Hash_Store(ivars->components, (Obj*)api, (Obj*)aggregator); } } DECREF(data_readers); DeletionsReader *del_reader = (DeletionsReader*)Hash_Fetch( ivars->components, (Obj*)VTable_Get_Name(DELETIONSREADER)); ivars->del_count = del_reader ? DelReader_Del_Count(del_reader) : 0; }
static void test_max_depth(TestBatch *batch) { Hash *circular = Hash_new(0); Hash_Store_Str(circular, "circular", 8, INCREF(circular)); Err_set_error(NULL); CharBuf *not_json = Json_to_json((Obj*)circular); TEST_TRUE(batch, not_json == NULL, "to_json returns NULL when fed recursing data"); TEST_TRUE(batch, Err_get_error() != NULL, "to_json sets Err_error when fed recursing data"); DECREF(Hash_Delete_Str(circular, "circular", 8)); DECREF(circular); }
void IxManager_Write_Merge_Data_IMP(IndexManager *self, int64_t cutoff) { IndexManagerIVARS *const ivars = IxManager_IVARS(self); String *merge_json = SSTR_WRAP_C("merge.json"); Hash *data = Hash_new(1); bool success; Hash_Store_Utf8(data, "cutoff", 6, (Obj*)Str_newf("%i64", cutoff)); success = Json_spew_json((Obj*)data, ivars->folder, merge_json); DECREF(data); if (!success) { THROW(ERR, "Failed to write to %o", merge_json); } }
static void test_max_depth(TestBatchRunner *runner) { Hash *circular = Hash_new(0); Hash_Store_Utf8(circular, "circular", 8, INCREF(circular)); Err_set_error(NULL); String *not_json = Json_to_json((Obj*)circular); TEST_TRUE(runner, not_json == NULL, "to_json returns NULL when fed recursing data"); TEST_TRUE(runner, Err_get_error() != NULL, "to_json sets global error when fed recursing data"); DECREF(Hash_Delete_Utf8(circular, "circular", 8)); DECREF(circular); }
Segment* Seg_init(Segment *self, int64_t number) { // Validate. if (number < 0) { THROW(ERR, "Segment number %i64 less than 0", number); } // Init. self->metadata = Hash_new(0); self->count = 0; self->by_num = VA_new(2); self->by_name = Hash_new(0); // Start field numbers at 1, not 0. VA_Push(self->by_num, (Obj*)CB_newf("")); // Assign. self->number = number; // Derive. self->name = Seg_num_to_name(number); return self; }