Obj* TermQuery_Dump_IMP(TermQuery *self) { TermQueryIVARS *ivars = TermQuery_IVARS(self); TermQuery_Dump_t super_dump = SUPER_METHOD_PTR(TERMQUERY, LUCY_TermQuery_Dump); Hash *dump = (Hash*)CERTIFY(super_dump(self), HASH); Hash_Store_Utf8(dump, "field", 5, Freezer_dump((Obj*)ivars->field)); Hash_Store_Utf8(dump, "term", 4, Freezer_dump(ivars->term)); return (Obj*)dump; }
PolyHighlightReader* PolyHLReader_init(PolyHighlightReader *self, VArray *readers, I32Array *offsets) { HLReader_init((HighlightReader*)self, NULL, NULL, NULL, NULL, -1); for (uint32_t i = 0, max = VA_Get_Size(readers); i < max; i++) { CERTIFY(VA_Fetch(readers, i), HIGHLIGHTREADER); } self->readers = (VArray*)INCREF(readers); self->offsets = (I32Array*)INCREF(offsets); return self; }
static PhraseQuery* S_do_init(PhraseQuery *self, String *field, Vector *terms, float boost) { Query_init((Query*)self, boost); PhraseQueryIVARS *const ivars = PhraseQuery_IVARS(self); for (uint32_t i = 0, max = Vec_Get_Size(terms); i < max; i++) { CERTIFY(Vec_Fetch(terms, i), OBJ); } ivars->field = field; ivars->terms = terms; return self; }
PolyDocReader* PolyDocReader_init(PolyDocReader *self, Vector *readers, I32Array *offsets) { DocReader_init((DocReader*)self, NULL, NULL, NULL, NULL, -1); PolyDocReaderIVARS *const ivars = PolyDocReader_IVARS(self); for (uint32_t i = 0, max = Vec_Get_Size(readers); i < max; i++) { CERTIFY(Vec_Fetch(readers, i), DOCREADER); } ivars->readers = (Vector*)INCREF(readers); ivars->offsets = (I32Array*)INCREF(offsets); return self; }
void BBSortEx_feed(BBSortEx *self, void *data) { SortEx_feed((SortExternal*)self, data); // Flush() if necessary. ByteBuf *bytebuf = (ByteBuf*)CERTIFY(*(ByteBuf**)data, BYTEBUF); self->mem_consumed += BB_Get_Size(bytebuf); if (self->mem_consumed >= self->mem_thresh) { BBSortEx_Flush(self); } }
int32_t StrIter_Compare_To_IMP(StringIterator *self, Obj *other) { StringIterator *twin = (StringIterator*)CERTIFY(other, STRINGITERATOR); if (self->string != twin->string) { THROW(ERR, "Can't compare iterators of different strings"); UNREACHABLE_RETURN(int32_t); } if (self->byte_offset < twin->byte_offset) { return -1; } if (self->byte_offset > twin->byte_offset) { return 1; } return 0; }
Obj* PolyAnalyzer_Dump_IMP(PolyAnalyzer *self) { PolyAnalyzerIVARS *const ivars = PolyAnalyzer_IVARS(self); PolyAnalyzer_Dump_t super_dump = SUPER_METHOD_PTR(POLYANALYZER, LUCY_PolyAnalyzer_Dump); Hash *dump = (Hash*)CERTIFY(super_dump(self), HASH); if (ivars->analyzers) { Hash_Store_Utf8(dump, "analyzers", 9, Freezer_dump((Obj*)ivars->analyzers)); } return (Obj*)dump; }
Obj* ProximityQuery_Dump_IMP(ProximityQuery *self) { ProximityQueryIVARS *ivars = ProximityQuery_IVARS(self); ProximityQuery_Dump_t super_dump = SUPER_METHOD_PTR(PROXIMITYQUERY, LUCY_ProximityQuery_Dump); Hash *dump = (Hash*)CERTIFY(super_dump(self), HASH); Hash_Store_Utf8(dump, "field", 5, Freezer_dump((Obj*)ivars->field)); Hash_Store_Utf8(dump, "terms", 5, Freezer_dump((Obj*)ivars->terms)); Hash_Store_Utf8(dump, "within", 6, (Obj*)Str_newf("%i64", (int64_t)ivars->within)); return (Obj*)dump; }
static PhraseQuery* S_do_init(PhraseQuery *self, CharBuf *field, VArray *terms, float boost) { uint32_t i, max; Query_init((Query*)self, boost); for (i = 0, max = VA_Get_Size(terms); i < max; i++) { CERTIFY(VA_Fetch(terms, i), OBJ); } self->field = field; self->terms = terms; return self; }
Obj* LeafQuery_Dump_IMP(LeafQuery *self) { LeafQueryIVARS *ivars = LeafQuery_IVARS(self); LeafQuery_Dump_t super_dump = SUPER_METHOD_PTR(LEAFQUERY, LUCY_LeafQuery_Dump); Hash *dump = (Hash*)CERTIFY(super_dump(self), HASH); if (ivars->field) { Hash_Store_Utf8(dump, "field", 5, Freezer_dump((Obj*)ivars->field)); } Hash_Store_Utf8(dump, "text", 4, Freezer_dump((Obj*)ivars->text)); return (Obj*)dump; }
PolyHighlightReader* PolyHLReader_init(PolyHighlightReader *self, Vector *readers, I32Array *offsets) { HLReader_init((HighlightReader*)self, NULL, NULL, NULL, NULL, -1); PolyHighlightReaderIVARS *const ivars = PolyHLReader_IVARS(self); for (size_t i = 0, max = Vec_Get_Size(readers); i < max; i++) { CERTIFY(Vec_Fetch(readers, i), HIGHLIGHTREADER); } ivars->readers = (Vector*)INCREF(readers); ivars->offsets = (I32Array*)INCREF(offsets); return self; }
static void S_add_string_field(Schema *self, String *field, FieldType *type) { SchemaIVARS *const ivars = Schema_IVARS(self); StringType *string_type = (StringType*)CERTIFY(type, STRINGTYPE); Similarity *sim = StringType_Make_Similarity(string_type); // Cache helpers. Hash_Store(ivars->sims, field, (Obj*)sim); // Store FieldType. Hash_Store(ivars->types, field, INCREF(type)); }
Obj* SnowStop_Dump_IMP(SnowballStopFilter *self) { SnowballStopFilterIVARS *ivars = SnowStop_IVARS(self); SnowStop_Dump_t super_dump = SUPER_METHOD_PTR(SNOWBALLSTOPFILTER, LUCY_SnowStop_Dump); Hash *dump = (Hash*)CERTIFY(super_dump(self), HASH); if (ivars->stoplist) { Hash_Store_Utf8(dump, "stoplist", 8, Freezer_dump((Obj*)ivars->stoplist)); } return (Obj*)dump; }
static void test_Peek_and_Pop_All(TestBatchRunner *runner) { NumPriorityQueue *pq = NumPriQ_new(5); Float64 *val; S_insert_num(pq, 3); S_insert_num(pq, 1); S_insert_num(pq, 2); S_insert_num(pq, 20); S_insert_num(pq, 10); val = (Float64*)CERTIFY(NumPriQ_Peek(pq), FLOAT64); TEST_INT_EQ(runner, (long)Float64_Get_Value(val), 1, "peek at the least item in the queue"); VArray *got = NumPriQ_Pop_All(pq); val = (Float64*)CERTIFY(VA_Fetch(got, 0), FLOAT64); TEST_INT_EQ(runner, (long)Float64_Get_Value(val), 20, "pop_all"); val = (Float64*)CERTIFY(VA_Fetch(got, 1), FLOAT64); TEST_INT_EQ(runner, (long)Float64_Get_Value(val), 10, "pop_all"); val = (Float64*)CERTIFY(VA_Fetch(got, 2), FLOAT64); TEST_INT_EQ(runner, (long)Float64_Get_Value(val), 3, "pop_all"); val = (Float64*)CERTIFY(VA_Fetch(got, 3), FLOAT64); TEST_INT_EQ(runner, (long)Float64_Get_Value(val), 2, "pop_all"); val = (Float64*)CERTIFY(VA_Fetch(got, 4), FLOAT64); TEST_INT_EQ(runner, (long)Float64_Get_Value(val), 1, "pop_all"); DECREF(got); DECREF(pq); }
static void test_offsets(TestBatchRunner *runner) { Folder *folder = S_folder_with_contents(); CompoundFileWriter *cf_writer = CFWriter_new(folder); Hash *cf_metadata; Hash *files; CFWriter_Consolidate(cf_writer); cf_metadata = (Hash*)CERTIFY( Json_slurp_json(folder, cfmeta_file), HASH); files = (Hash*)CERTIFY( Hash_Fetch_Utf8(cf_metadata, "files", 5), HASH); bool offsets_ok = true; TEST_TRUE(runner, Hash_Get_Size(files) > 0, "Multiple files"); HashIterator *iter = HashIter_new(files); while (HashIter_Next(iter)) { String *file = HashIter_Get_Key(iter); Hash *stats = (Hash*)CERTIFY(HashIter_Get_Value(iter), HASH); Obj *offset = CERTIFY(Hash_Fetch_Utf8(stats, "offset", 6), OBJ); int64_t offs = Obj_To_I64(offset); if (offs % 8 != 0) { offsets_ok = false; FAIL(runner, "Offset %" PRId64 " for %s not a multiple of 8", offset, Str_Get_Ptr8(file)); break; } } DECREF(iter); if (offsets_ok) { PASS(runner, "All offsets are multiples of 8"); } DECREF(cf_metadata); DECREF(cf_writer); DECREF(folder); }
static ProximityQuery* S_do_init(ProximityQuery *self, String *field, VArray *terms, float boost, uint32_t within) { Query_init((Query*)self, boost); ProximityQueryIVARS *const ivars = ProximityQuery_IVARS(self); for (uint32_t i = 0, max = VA_Get_Size(terms); i < max; i++) { CERTIFY(VA_Fetch(terms, i), OBJ); } ivars->field = field; ivars->terms = terms; ivars->within = within; return self; }
int32_t Blob_Compare_To_IMP(Blob *self, Obj *other) { Blob *twin = (Blob*)CERTIFY(other, BLOB); const size_t size = self->size < twin->size ? self->size : twin->size; int32_t comparison = memcmp(self->buf, twin->buf, size); if (comparison == 0 && self->size != twin->size) { comparison = self->size < twin->size ? -1 : 1; } return comparison; }
QueryParser* QParser_init(QueryParser *self, Schema *schema, Analyzer *analyzer, String *default_boolop, Vector *fields) { QueryParserIVARS *const ivars = QParser_IVARS(self); // Init. ivars->heed_colons = false; ivars->lexer = QueryLexer_new(); // Assign. ivars->schema = (Schema*)INCREF(schema); ivars->analyzer = (Analyzer*)INCREF(analyzer); ivars->default_boolop = default_boolop ? Str_Clone(default_boolop) : Str_new_from_trusted_utf8("OR", 2); if (fields) { ivars->fields = Vec_Clone(fields); for (uint32_t i = 0, max = Vec_Get_Size(fields); i < max; i++) { CERTIFY(Vec_Fetch(fields, i), STRING); } Vec_Sort(ivars->fields); } else { Vector *all_fields = Schema_All_Fields(schema); uint32_t num_fields = Vec_Get_Size(all_fields); ivars->fields = Vec_new(num_fields); for (uint32_t i = 0; i < num_fields; i++) { String *field = (String*)Vec_Fetch(all_fields, i); FieldType *type = Schema_Fetch_Type(schema, field); if (type && FType_Indexed(type)) { Vec_Push(ivars->fields, INCREF(field)); } } DECREF(all_fields); } Vec_Sort(ivars->fields); // Derive default "occur" from default boolean operator. if (Str_Equals_Utf8(ivars->default_boolop, "OR", 2)) { ivars->default_occur = SHOULD; } else if (Str_Equals_Utf8(ivars->default_boolop, "AND", 3)) { ivars->default_occur = MUST; } else { THROW(ERR, "Invalid value for default_boolop: %o", ivars->default_boolop); } return self; }
Obj* Hash_load(Hash *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); CharBuf *class_name = (CharBuf*)Hash_Fetch_Str(source, "_class", 6); UNUSED_VAR(self); // Assume that the presence of the "_class" key paired with a valid class // name indicates the output of a Dump rather than an ordinary Hash. */ if (class_name && CB_Is_A(class_name, CHARBUF)) { VTable *vtable = VTable_fetch_vtable(class_name); if (!vtable) { CharBuf *parent_class = VTable_find_parent_class(class_name); if (parent_class) { VTable *parent = VTable_singleton(parent_class, NULL); vtable = VTable_singleton(class_name, parent); DECREF(parent_class); } else { // TODO: Fix Hash_Load() so that it works with ordinary hash // keys named "_class". THROW(ERR, "Can't find class '%o'", class_name); } } // Dispatch to an alternate Load() method. if (vtable) { Obj_Load_t load = METHOD_PTR(vtable, Lucy_Obj_Load); if (load == Obj_load) { THROW(ERR, "Abstract method Load() not defined for %o", VTable_Get_Name(vtable)); } else if (load != (Obj_Load_t)Hash_load) { // stop inf loop return VTable_Load_Obj(vtable, dump); } } } // It's an ordinary Hash. Hash *loaded = Hash_new(source->size); Obj *key; Obj *value; Hash_Iterate(source); while (Hash_Next(source, &key, &value)) { Hash_Store(loaded, key, Obj_Load(value, value)); } return (Obj*)loaded; }
void BBSortEx_Feed_IMP(BBSortEx *self, Obj *item) { BBSortExIVARS *const ivars = BBSortEx_IVARS(self); BBSortEx_Feed_t super_feed = SUPER_METHOD_PTR(BBSORTEX, LUCY_BBSortEx_Feed); super_feed(self, item); // Flush() if necessary. ByteBuf *bytebuf = (ByteBuf*)CERTIFY(item, BYTEBUF); ivars->mem_consumed += BB_Get_Size(bytebuf); if (ivars->mem_consumed >= ivars->mem_thresh) { BBSortEx_Flush(self); } }
void BitVec_mimic(BitVector *self, Obj *other) { BitVector *twin = (BitVector*)CERTIFY(other, BITVECTOR); const uint32_t my_byte_size = (uint32_t)ceil(self->cap / 8.0); const uint32_t twin_byte_size = (uint32_t)ceil(twin->cap / 8.0); if (my_byte_size > twin_byte_size) { uint32_t space = my_byte_size - twin_byte_size; memset(self->bits + twin_byte_size, 0, space); } else if (my_byte_size < twin_byte_size) { BitVec_Grow(self, twin->cap - 1); } memcpy(self->bits, twin->bits, twin_byte_size); }
void BlobSortEx_Feed_IMP(BlobSortEx *self, Obj *item) { BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self); BlobSortEx_Feed_t super_feed = SUPER_METHOD_PTR(BLOBSORTEX, LUCY_BlobSortEx_Feed); super_feed(self, item); // Flush() if necessary. Blob *blob = (Blob*)CERTIFY(item, BLOB); ivars->mem_consumed += Blob_Get_Size(blob); if (ivars->mem_consumed >= ivars->mem_thresh) { BlobSortEx_Flush(self); } }
VArray* IxManager_recycle(IndexManager *self, PolyReader *reader, DeletionsWriter *del_writer, int64_t cutoff, bool_t optimize) { VArray *seg_readers = PolyReader_Get_Seg_Readers(reader); VArray *candidates = VA_Gather(seg_readers, S_check_cutoff, &cutoff); VArray *recyclables = VA_new(VA_Get_Size(candidates)); const uint32_t num_candidates = VA_Get_Size(candidates); if (optimize) { DECREF(recyclables); return candidates; } // Sort by ascending size in docs, choose sparsely populated segments. VA_Sort(candidates, S_compare_doc_count, NULL); int32_t *counts = (int32_t*)MALLOCATE(num_candidates * sizeof(int32_t)); for (uint32_t i = 0; i < num_candidates; i++) { SegReader *seg_reader = (SegReader*)CERTIFY( VA_Fetch(candidates, i), SEGREADER); counts[i] = SegReader_Doc_Count(seg_reader); } I32Array *doc_counts = I32Arr_new_steal(counts, num_candidates); uint32_t threshold = IxManager_Choose_Sparse(self, doc_counts); DECREF(doc_counts); // Move SegReaders to be recycled. for (uint32_t i = 0; i < threshold; i++) { VA_Store(recyclables, i, VA_Delete(candidates, i)); } // Find segments where at least 10% of all docs have been deleted. for (uint32_t i = threshold; i < num_candidates; i++) { SegReader *seg_reader = (SegReader*)VA_Delete(candidates, i); CharBuf *seg_name = SegReader_Get_Seg_Name(seg_reader); double doc_max = SegReader_Doc_Max(seg_reader); double num_deletions = DelWriter_Seg_Del_Count(del_writer, seg_name); double del_proportion = num_deletions / doc_max; if (del_proportion >= 0.1) { VA_Push(recyclables, (Obj*)seg_reader); } else { DECREF(seg_reader); } } DECREF(candidates); return recyclables; }
PolyLexiconReader* PolyLexReader_init(PolyLexiconReader *self, VArray *readers, I32Array *offsets) { Schema *schema = NULL; for (uint32_t i = 0, max = VA_Get_Size(readers); i < max; i++) { LexiconReader *reader = (LexiconReader*)CERTIFY(VA_Fetch(readers, i), LEXICONREADER); if (!schema) { schema = LexReader_Get_Schema(reader); } } LexReader_init((LexiconReader*)self, schema, NULL, NULL, NULL, -1); PolyLexiconReaderIVARS *const ivars = PolyLexReader_IVARS(self); ivars->readers = (VArray*)INCREF(readers); ivars->offsets = (I32Array*)INCREF(offsets); return self; }
Obj* I64SortCache_value(Int64SortCache *self, int32_t ord, Obj *blank) { if (ord == self->null_ord) { return NULL; } else if (ord < 0) { THROW(ERR, "Ordinal less than 0 for %o: %i32", self->field, ord); } else { Integer64 *int_blank = (Integer64*)CERTIFY(blank, INTEGER64); InStream_Seek(self->dat_in, ord * sizeof(int64_t)); Int64_Set_Value(int_blank, InStream_Read_I64(self->dat_in)); } return blank; }
Obj* F32SortCache_value(Float32SortCache *self, int32_t ord, Obj *blank) { if (ord == self->null_ord) { return NULL; } else if (ord < 0) { THROW(ERR, "Ordinal less than 0 for %o: %i32", self->field, ord); } else { Float32 *num_blank = (Float32*)CERTIFY(blank, FLOAT32); InStream_Seek(self->dat_in, ord * sizeof(float)); Float32_Set_Value(num_blank, InStream_Read_F32(self->dat_in)); } return blank; }
static void S_add_text_field(Schema *self, String *field, FieldType *type) { SchemaIVARS *const ivars = Schema_IVARS(self); FullTextType *fttype = (FullTextType*)CERTIFY(type, FULLTEXTTYPE); Similarity *sim = FullTextType_Make_Similarity(fttype); Analyzer *analyzer = FullTextType_Get_Analyzer(fttype); // Cache helpers. Hash_Store(ivars->sims, field, (Obj*)sim); Hash_Store(ivars->analyzers, field, INCREF(analyzer)); S_add_unique(ivars->uniq_analyzers, (Obj*)analyzer); // Store FieldType. Hash_Store(ivars->types, field, INCREF(type)); }
Doc* Doc_init(Doc *self, void *fields, int32_t doc_id) { DocIVARS *const ivars = Doc_IVARS(self); Hash *hash; if (fields) { hash = (Hash *)INCREF(CERTIFY(fields, HASH)); } else { hash = Hash_new(0); } ivars->fields = hash; ivars->doc_id = doc_id; return self; }
void BitVec_Mimic_IMP(BitVector *self, Obj *other) { CERTIFY(other, BITVECTOR); BitVectorIVARS *const ivars = BitVec_IVARS(self); BitVectorIVARS *const ovars = BitVec_IVARS((BitVector*)other); const size_t my_byte_size = SI_octet_size(ivars->cap); const size_t other_byte_size = SI_octet_size(ovars->cap); if (my_byte_size > other_byte_size) { size_t space = my_byte_size - other_byte_size; memset(ivars->bits + other_byte_size, 0, space); } else if (my_byte_size < other_byte_size) { BitVec_Grow(self, ovars->cap - 1); } memcpy(ivars->bits, ovars->bits, other_byte_size); }
Hash* Hash_dump(Hash *self) { Hash *dump = Hash_new(self->size); Obj *key; Obj *value; Hash_Iterate(self); while (Hash_Next(self, &key, &value)) { // Since JSON only supports text hash keys, Dump() can only support // text hash keys. CERTIFY(key, CHARBUF); Hash_Store(dump, key, Obj_Dump(value)); } return dump; }