Obj* Err_certify(Obj *obj, VTable *vtable, const char *file, int line, const char *func) { if (!obj) { Err_throw_at(ERR, file, line, func, "Object isn't a %o, it's NULL", VTable_Get_Name(vtable)); } else if (!SI_obj_is_a(obj, vtable)) { Err_throw_at(ERR, file, line, func, "Can't downcast from %o to %o", Obj_Get_Class_Name(obj), VTable_Get_Name(vtable)); } return obj; }
Obj* kino_Err_assert_is_a(Obj *obj, VTable *vtable, const char *file, int line, const char *func) { if (!obj) { Err_throw_at(file, line, func, "Object isn't a %o, it's NULL", VTable_Get_Name(vtable)); } else if ( !Obj_Is_A(obj, vtable) ) { Err_throw_at(file, line, func, "Object isn't a %o, it's a %o", VTable_Get_Name(vtable), Obj_Get_Class_Name(obj)); } return obj; }
void SortColl_set_reader(SortCollector *self, SegReader *reader) { SortCollectorIVARS *const ivars = SortColl_IVARS(self); SortReader *sort_reader = (SortReader*)SegReader_Fetch(reader, VTable_Get_Name(SORTREADER)); // Reset threshold variables and trigger auto-action behavior. MatchDocIVARS *const bumped_ivars = MatchDoc_IVARS(ivars->bumped); bumped_ivars->doc_id = INT32_MAX; ivars->bubble_doc = INT32_MAX; bumped_ivars->score = ivars->need_score ? F32_NEGINF : F32_NAN; ivars->bubble_score = ivars->need_score ? F32_NEGINF : F32_NAN; ivars->actions = ivars->auto_actions; // Obtain sort caches. Derive actions array for this segment. if (ivars->need_values && sort_reader) { for (uint32_t i = 0, max = ivars->num_rules; i < max; i++) { SortRule *rule = (SortRule*)VA_Fetch(ivars->rules, i); CharBuf *field = SortRule_Get_Field(rule); SortCache *cache = field ? SortReader_Fetch_Sort_Cache(sort_reader, field) : NULL; ivars->sort_caches[i] = cache; ivars->derived_actions[i] = S_derive_action(rule, cache); if (cache) { ivars->ord_arrays[i] = SortCache_Get_Ords(cache); } else { ivars->ord_arrays[i] = NULL; } } } ivars->seg_doc_max = reader ? SegReader_Doc_Max(reader) : 0; Coll_set_reader((Collector*)self, reader); }
Matcher* DefDelWriter_seg_deletions(DefaultDeletionsWriter *self, SegReader *seg_reader) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); Matcher *deletions = NULL; Segment *segment = SegReader_Get_Segment(seg_reader); CharBuf *seg_name = Seg_Get_Name(segment); Integer32 *tick_obj = (Integer32*)Hash_Fetch(ivars->name_to_tick, (Obj*)seg_name); int32_t tick = tick_obj ? Int32_Get_Value(tick_obj) : 0; SegReader *candidate = tick_obj ? (SegReader*)VA_Fetch(ivars->seg_readers, tick) : NULL; if (tick_obj) { DeletionsReader *del_reader = (DeletionsReader*)SegReader_Obtain( candidate, VTable_Get_Name(DELETIONSREADER)); if (ivars->updated[tick] || DelReader_Del_Count(del_reader)) { BitVector *deldocs = (BitVector*)VA_Fetch(ivars->bit_vecs, tick); deletions = (Matcher*)BitVecMatcher_new(deldocs); } } else { // Sanity check. THROW(ERR, "Couldn't find SegReader %o", seg_reader); } return deletions; }
Matcher* RangeCompiler_make_matcher(RangeCompiler *self, SegReader *reader, bool_t need_score) { RangeQuery *parent = (RangeQuery*)self->parent; SortReader *sort_reader = (SortReader*)SegReader_Fetch(reader, VTable_Get_Name(SORTREADER)); SortCache *sort_cache = sort_reader ? SortReader_Fetch_Sort_Cache(sort_reader, parent->field) : NULL; UNUSED_VAR(need_score); if (!sort_cache) { return NULL; } else { int32_t lower = S_find_lower_bound(self, sort_cache); int32_t upper = S_find_upper_bound(self, sort_cache); int32_t max_ord = SortCache_Get_Cardinality(sort_cache) + 1; if (lower > max_ord || upper < 0) { return NULL; } else { int32_t doc_max = SegReader_Doc_Max(reader); return (Matcher*)RangeMatcher_new(lower, upper, sort_cache, doc_max); } } }
SegReader* SegReader_init(SegReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, VArray *segments, int32_t seg_tick) { Segment *segment; IxReader_init((IndexReader*)self, schema, folder, snapshot, segments, seg_tick, NULL); SegReaderIVARS *const ivars = SegReader_IVARS(self); segment = SegReader_Get_Segment(self); ivars->doc_max = (int32_t)Seg_Get_Count(segment); ivars->seg_name = (CharBuf*)INCREF(Seg_Get_Name(segment)); ivars->seg_num = Seg_Get_Number(segment); Err *error = Err_trap(S_try_init_components, self); if (error) { // An error occurred, so clean up self and rethrow the exception. DECREF(self); RETHROW(error); } DeletionsReader *del_reader = (DeletionsReader*)Hash_Fetch( ivars->components, (Obj*)VTable_Get_Name(DELETIONSREADER)); ivars->del_count = del_reader ? DelReader_Del_Count(del_reader) : 0; return self; }
void DefDelWriter_delete_by_term(DefaultDeletionsWriter *self, const CharBuf *field, Obj *term) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i); PostingListReader *plist_reader = (PostingListReader*)SegReader_Fetch( seg_reader, VTable_Get_Name(POSTINGLISTREADER)); BitVector *bit_vec = (BitVector*)VA_Fetch(ivars->bit_vecs, i); PostingList *plist = plist_reader ? PListReader_Posting_List(plist_reader, field, term) : NULL; int32_t doc_id; int32_t num_zapped = 0; // Iterate through postings, marking each doc as deleted. if (plist) { while (0 != (doc_id = PList_Next(plist))) { num_zapped += !BitVec_Get(bit_vec, doc_id); BitVec_Set(bit_vec, doc_id); } if (num_zapped) { ivars->updated[i] = true; } DECREF(plist); } } }
void SortWriter_add_segment(SortWriter *self, SegReader *reader, I32Array *doc_map) { SortWriterIVARS *const ivars = SortWriter_IVARS(self); VArray *fields = Schema_All_Fields(ivars->schema); // Proceed field-at-a-time, rather than doc-at-a-time. for (uint32_t i = 0, max = VA_Get_Size(fields); i < max; i++) { CharBuf *field = (CharBuf*)VA_Fetch(fields, i); SortReader *sort_reader = (SortReader*)SegReader_Fetch( reader, VTable_Get_Name(SORTREADER)); SortCache *cache = sort_reader ? SortReader_Fetch_Sort_Cache(sort_reader, field) : NULL; if (cache) { int32_t field_num = Seg_Field_Num(ivars->segment, field); SortFieldWriter *field_writer = S_lazy_init_field_writer(self, field_num); SortFieldWriter_Add_Segment(field_writer, reader, doc_map, cache); ivars->flush_at_finish = true; } } DECREF(fields); }
Matcher* TermCompiler_Make_Matcher_IMP(TermCompiler *self, SegReader *reader, bool need_score) { TermCompilerIVARS *const ivars = TermCompiler_IVARS(self); TermQueryIVARS *const parent_ivars = TermQuery_IVARS((TermQuery*)ivars->parent); PostingListReader *plist_reader = (PostingListReader*)SegReader_Fetch( reader, VTable_Get_Name(POSTINGLISTREADER)); PostingList *plist = plist_reader ? PListReader_Posting_List(plist_reader, parent_ivars->field, parent_ivars->term) : NULL; if (plist == NULL || PList_Get_Doc_Freq(plist) == 0) { DECREF(plist); return NULL; } else { Matcher *retval = PList_Make_Matcher(plist, ivars->sim, (Compiler*)self, need_score); DECREF(plist); return retval; } }
void Indexer_add_index(Indexer *self, Obj *index) { Folder *other_folder = NULL; IndexReader *reader = NULL; if (Obj_Is_A(index, FOLDER)) { other_folder = (Folder*)INCREF(index); } else if (Obj_Is_A(index, CHARBUF)) { other_folder = (Folder*)FSFolder_new((CharBuf*)index); } else { THROW(ERR, "Invalid type for 'index': %o", Obj_Get_Class_Name(index)); } reader = IxReader_open((Obj*)other_folder, NULL, NULL); if (reader == NULL) { THROW(ERR, "Index doesn't seem to contain any data"); } else { Schema *schema = self->schema; Schema *other_schema = IxReader_Get_Schema(reader); VArray *other_fields = Schema_All_Fields(other_schema); VArray *seg_readers = IxReader_Seg_Readers(reader); uint32_t i, max; // Validate schema compatibility and add fields. Schema_Eat(schema, other_schema); // Add fields to Segment. for (i = 0, max = VA_Get_Size(other_fields); i < max; i++) { CharBuf *other_field = (CharBuf*)VA_Fetch(other_fields, i); Seg_Add_Field(self->segment, other_field); } DECREF(other_fields); // Add all segments. for (i = 0, max = VA_Get_Size(seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(seg_readers, i); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Fetch( seg_reader, VTable_Get_Name(DELETIONSREADER)); Matcher *deletions = del_reader ? DelReader_Iterator(del_reader) : NULL; I32Array *doc_map = DelWriter_Generate_Doc_Map(self->del_writer, deletions, SegReader_Doc_Max(seg_reader), (int32_t)Seg_Get_Count(self->segment) ); SegWriter_Add_Segment(self->seg_writer, seg_reader, doc_map); DECREF(deletions); DECREF(doc_map); } DECREF(seg_readers); } DECREF(reader); DECREF(other_folder); }
Obj* Err_downcast(Obj *obj, VTable *vtable, const char *file, int line, const char *func) { if (obj && !SI_obj_is_a(obj, vtable)) { Err_throw_at(ERR, file, line, func, "Can't downcast from %o to %o", Obj_Get_Class_Name(obj), VTable_Get_Name(vtable)); } return obj; }
static void S_init_sub_readers(PolyReader *self, VArray *sub_readers) { PolyReaderIVARS *const ivars = PolyReader_IVARS(self); uint32_t num_sub_readers = VA_Get_Size(sub_readers); int32_t *starts = (int32_t*)MALLOCATE(num_sub_readers * sizeof(int32_t)); Hash *data_readers = Hash_new(0); DECREF(ivars->sub_readers); DECREF(ivars->offsets); ivars->sub_readers = (VArray*)INCREF(sub_readers); // Accumulate doc_max, subreader start offsets, and DataReaders. ivars->doc_max = 0; for (uint32_t i = 0; i < num_sub_readers; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(sub_readers, i); Hash *components = SegReader_Get_Components(seg_reader); CharBuf *api; DataReader *component; starts[i] = ivars->doc_max; ivars->doc_max += SegReader_Doc_Max(seg_reader); Hash_Iterate(components); while (Hash_Next(components, (Obj**)&api, (Obj**)&component)) { VArray *readers = (VArray*)Hash_Fetch(data_readers, (Obj*)api); if (!readers) { readers = VA_new(num_sub_readers); Hash_Store(data_readers, (Obj*)api, (Obj*)readers); } VA_Store(readers, i, INCREF(component)); } } ivars->offsets = I32Arr_new_steal(starts, num_sub_readers); CharBuf *api; VArray *readers; Hash_Iterate(data_readers); while (Hash_Next(data_readers, (Obj**)&api, (Obj**)&readers)) { DataReader *datareader = (DataReader*)CERTIFY(S_first_non_null(readers), DATAREADER); DataReader *aggregator = DataReader_Aggregator(datareader, readers, ivars->offsets); if (aggregator) { CERTIFY(aggregator, DATAREADER); Hash_Store(ivars->components, (Obj*)api, (Obj*)aggregator); } } DECREF(data_readers); DeletionsReader *del_reader = (DeletionsReader*)Hash_Fetch( ivars->components, (Obj*)VTable_Get_Name(DELETIONSREADER)); ivars->del_count = del_reader ? DelReader_Del_Count(del_reader) : 0; }
Obj* Hash_load(Hash *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); CharBuf *class_name = (CharBuf*)Hash_Fetch_Str(source, "_class", 6); UNUSED_VAR(self); // Assume that the presence of the "_class" key paired with a valid class // name indicates the output of a Dump rather than an ordinary Hash. */ if (class_name && CB_Is_A(class_name, CHARBUF)) { VTable *vtable = VTable_fetch_vtable(class_name); if (!vtable) { CharBuf *parent_class = VTable_find_parent_class(class_name); if (parent_class) { VTable *parent = VTable_singleton(parent_class, NULL); vtable = VTable_singleton(class_name, parent); DECREF(parent_class); } else { // TODO: Fix Hash_Load() so that it works with ordinary hash // keys named "_class". THROW(ERR, "Can't find class '%o'", class_name); } } // Dispatch to an alternate Load() method. if (vtable) { Obj_Load_t load = METHOD_PTR(vtable, Lucy_Obj_Load); if (load == Obj_load) { THROW(ERR, "Abstract method Load() not defined for %o", VTable_Get_Name(vtable)); } else if (load != (Obj_Load_t)Hash_load) { // stop inf loop return VTable_Load_Obj(vtable, dump); } } } // It's an ordinary Hash. Hash *loaded = Hash_new(source->size); Obj *key; Obj *value; Hash_Iterate(source); while (Hash_Next(source, &key, &value)) { Hash_Store(loaded, key, Obj_Load(value, value)); } return (Obj*)loaded; }
static void test_Is_A(TestBatch *batch) { CharBuf *charbuf = CB_new(0); VTable *bb_vtable = CB_Get_VTable(charbuf); CharBuf *klass = CB_Get_Class_Name(charbuf); TEST_TRUE(batch, CB_Is_A(charbuf, CHARBUF), "CharBuf Is_A CharBuf."); TEST_TRUE(batch, CB_Is_A(charbuf, OBJ), "CharBuf Is_A Obj."); TEST_TRUE(batch, bb_vtable == CHARBUF, "Get_VTable"); TEST_TRUE(batch, CB_Equals(VTable_Get_Name(CHARBUF), (Obj*)klass), "Get_Class_Name"); DECREF(charbuf); }
Obj* Obj_deserialize(Obj *self, InStream *instream) { CharBuf *class_name = CB_deserialize(NULL, instream); if (!self) { VTable *vtable = VTable_singleton(class_name, (VTable*)&OBJ); self = VTable_Make_Obj(vtable); } else { CharBuf *my_class = VTable_Get_Name(self->vtable); if (!CB_Equals(class_name, (Obj*)my_class)) THROW("Class mismatch: %o %o", class_name, my_class); } DECREF(class_name); return Obj_init(self); }
Matcher* PhraseCompiler_make_matcher(PhraseCompiler *self, SegReader *reader, bool_t need_score) { UNUSED_VAR(need_score); PhraseQuery *const parent = (PhraseQuery*)self->parent; VArray *const terms = parent->terms; uint32_t num_terms = VA_Get_Size(terms); // Bail if there are no terms. if (!num_terms) return NULL; // Bail unless field is valid and posting type supports positions. Similarity *sim = PhraseCompiler_Get_Similarity(self); Posting *posting = Sim_Make_Posting(sim); if (posting == NULL || !Obj_Is_A((Obj*)posting, SCOREPOSTING)) { DECREF(posting); return NULL; } DECREF(posting); // Bail if there's no PostingListReader for this segment. PostingListReader *const plist_reader = (PostingListReader*)SegReader_Fetch( reader, VTable_Get_Name(POSTINGLISTREADER)); if (!plist_reader) { return NULL; } // Look up each term. VArray *plists = VA_new(num_terms); for (uint32_t i = 0; i < num_terms; i++) { Obj *term = VA_Fetch(terms, i); PostingList *plist = PListReader_Posting_List(plist_reader, parent->field, term); // Bail if any one of the terms isn't in the index. if (!plist || !PList_Get_Doc_Freq(plist)) { DECREF(plist); DECREF(plists); return NULL; } VA_Push(plists, (Obj*)plist); } Matcher *retval = (Matcher*)PhraseScorer_new(sim, plists, (Compiler*)self); DECREF(plists); return retval; }
void DefDelWriter_merge_segment(DefaultDeletionsWriter *self, SegReader *reader, I32Array *doc_map) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); UNUSED_VAR(doc_map); Segment *segment = SegReader_Get_Segment(reader); Hash *del_meta = (Hash*)Seg_Fetch_Metadata_Str(segment, "deletions", 9); if (del_meta) { VArray *seg_readers = ivars->seg_readers; Hash *files = (Hash*)Hash_Fetch_Str(del_meta, "files", 5); if (files) { CharBuf *seg; Hash *mini_meta; Hash_Iterate(files); while (Hash_Next(files, (Obj**)&seg, (Obj**)&mini_meta)) { /* Find the segment the deletions from the SegReader * we're adding correspond to. If it's gone, we don't * need to worry about losing deletions files that point * at it. */ for (uint32_t i = 0, max = VA_Get_Size(seg_readers); i < max; i++) { SegReader *candidate = (SegReader*)VA_Fetch(seg_readers, i); CharBuf *candidate_name = Seg_Get_Name(SegReader_Get_Segment(candidate)); if (CB_Equals(seg, (Obj*)candidate_name)) { /* If the count hasn't changed, we're about to * merge away the most recent deletions file * pointing at this target segment -- so force a * new file to be written out. */ int32_t count = (int32_t)Obj_To_I64(Hash_Fetch_Str(mini_meta, "count", 5)); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Obtain( candidate, VTable_Get_Name(DELETIONSREADER)); if (count == DelReader_Del_Count(del_reader)) { ivars->updated[i] = true; } break; } } } } } }
DefaultDeletionsWriter* DefDelWriter_init(DefaultDeletionsWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader) { DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader); DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); ivars->seg_readers = PolyReader_Seg_Readers(polyreader); uint32_t num_seg_readers = VA_Get_Size(ivars->seg_readers); ivars->seg_starts = PolyReader_Offsets(polyreader); ivars->bit_vecs = VA_new(num_seg_readers); ivars->updated = (bool*)CALLOCATE(num_seg_readers, sizeof(bool)); ivars->searcher = IxSearcher_new((Obj*)polyreader); ivars->name_to_tick = Hash_new(num_seg_readers); // Materialize a BitVector of deletions for each segment. for (uint32_t i = 0; i < num_seg_readers; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i); BitVector *bit_vec = BitVec_new(SegReader_Doc_Max(seg_reader)); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Fetch( seg_reader, VTable_Get_Name(DELETIONSREADER)); Matcher *seg_dels = del_reader ? DelReader_Iterator(del_reader) : NULL; if (seg_dels) { int32_t del; while (0 != (del = Matcher_Next(seg_dels))) { BitVec_Set(bit_vec, del); } DECREF(seg_dels); } VA_Store(ivars->bit_vecs, i, (Obj*)bit_vec); Hash_Store(ivars->name_to_tick, (Obj*)SegReader_Get_Seg_Name(seg_reader), (Obj*)Int32_new(i)); } return self; }
void HLWriter_add_segment(HighlightWriter *self, SegReader *reader, I32Array *doc_map) { HighlightWriterIVARS *const ivars = HLWriter_IVARS(self); int32_t doc_max = SegReader_Doc_Max(reader); if (doc_max == 0) { // Bail if the supplied segment is empty. return; } else { DefaultHighlightReader *hl_reader = (DefaultHighlightReader*)CERTIFY( SegReader_Obtain(reader, VTable_Get_Name(HIGHLIGHTREADER)), DEFAULTHIGHLIGHTREADER); OutStream *dat_out = S_lazy_init(self); OutStream *ix_out = ivars->ix_out; int32_t orig; ByteBuf *bb = BB_new(0); for (orig = 1; orig <= doc_max; orig++) { // Skip deleted docs. if (doc_map && !I32Arr_Get(doc_map, orig)) { continue; } // Write file pointer. OutStream_Write_I64(ix_out, OutStream_Tell(dat_out)); // Copy the raw record. DefHLReader_Read_Record(hl_reader, orig, bb); OutStream_Write_Bytes(dat_out, BB_Get_Buf(bb), BB_Get_Size(bb)); BB_Set_Size(bb, 0); } DECREF(bb); } }
void DocWriter_Add_Segment_IMP(DocWriter *self, SegReader *reader, I32Array *doc_map) { DocWriterIVARS *const ivars = DocWriter_IVARS(self); int32_t doc_max = SegReader_Doc_Max(reader); if (doc_max == 0) { // Bail if the supplied segment is empty. return; } else { OutStream *const dat_out = S_lazy_init(self); OutStream *const ix_out = ivars->ix_out; ByteBuf *const buffer = BB_new(0); DefaultDocReader *const doc_reader = (DefaultDocReader*)CERTIFY( SegReader_Obtain(reader, VTable_Get_Name(DOCREADER)), DEFAULTDOCREADER); for (int32_t i = 1, max = SegReader_Doc_Max(reader); i <= max; i++) { if (I32Arr_Get(doc_map, i)) { int64_t start = OutStream_Tell(dat_out); // Copy record over. DefDocReader_Read_Record(doc_reader, buffer, i); char *buf = BB_Get_Buf(buffer); size_t size = BB_Get_Size(buffer); OutStream_Write_Bytes(dat_out, buf, size); // Write file pointer. OutStream_Write_I64(ix_out, start); } } DECREF(buffer); } }
CharBuf* Obj_get_class_name(Obj *self) { return VTable_Get_Name(self->vtable); }
String* Obj_Get_Class_Name_IMP(Obj *self) { return VTable_Get_Name(self->vtable); }