void PostPool_Add_Segment_IMP(PostingPool *self, SegReader *reader, I32Array *doc_map, int32_t doc_base) { PostingPoolIVARS *const ivars = PostPool_IVARS(self); LexiconReader *lex_reader = (LexiconReader*)SegReader_Fetch( reader, Class_Get_Name(LEXICONREADER)); Lexicon *lexicon = lex_reader ? LexReader_Lexicon(lex_reader, ivars->field, NULL) : NULL; if (lexicon) { PostingListReader *plist_reader = (PostingListReader*)SegReader_Fetch( reader, Class_Get_Name(POSTINGLISTREADER)); PostingList *plist = plist_reader ? PListReader_Posting_List(plist_reader, ivars->field, NULL) : NULL; if (!plist) { THROW(ERR, "Got a Lexicon but no PostingList for '%o' in '%o'", ivars->field, SegReader_Get_Seg_Name(reader)); } PostingPool *run = PostPool_new(ivars->schema, ivars->snapshot, ivars->segment, ivars->polyreader, ivars->field, ivars->lex_writer, ivars->mem_pool, ivars->lex_temp_out, ivars->post_temp_out, ivars->skip_out); PostingPoolIVARS *const run_ivars = PostPool_IVARS(run); run_ivars->lexicon = lexicon; run_ivars->plist = plist; run_ivars->doc_base = doc_base; run_ivars->doc_map = (I32Array*)INCREF(doc_map); PostPool_Add_Run(self, (SortExternal*)run); } }
Obj* Err_certify(Obj *obj, Class *klass, const char *file, int line, const char *func) { if (!obj) { Err_throw_at(ERR, file, line, func, "Object isn't a %o, it's NULL", Class_Get_Name(klass)); } else if (!SI_obj_is_a(obj, klass)) { Err_throw_at(ERR, file, line, func, "Can't downcast from %o to %o", Obj_get_class_name(obj), Class_Get_Name(klass)); } return obj; }
Matcher* RangeCompiler_Make_Matcher_IMP(RangeCompiler *self, SegReader *reader, bool need_score) { RangeQuery *parent = (RangeQuery*)RangeCompiler_IVARS(self)->parent; String *field = RangeQuery_IVARS(parent)->field; SortReader *sort_reader = (SortReader*)SegReader_Fetch(reader, Class_Get_Name(SORTREADER)); SortCache *sort_cache = sort_reader ? SortReader_Fetch_Sort_Cache(sort_reader, field) : NULL; UNUSED_VAR(need_score); if (!sort_cache) { return NULL; } else { int32_t lower = S_find_lower_bound(self, sort_cache); int32_t upper = S_find_upper_bound(self, sort_cache); int32_t max_ord = SortCache_Get_Cardinality(sort_cache) + 1; if (lower > max_ord || upper < 0) { return NULL; } else { int32_t doc_max = SegReader_Doc_Max(reader); return (Matcher*)RangeMatcher_new(lower, upper, sort_cache, doc_max); } } }
void SortWriter_Add_Segment_IMP(SortWriter *self, SegReader *reader, I32Array *doc_map) { SortWriterIVARS *const ivars = SortWriter_IVARS(self); Vector *fields = Schema_All_Fields(ivars->schema); // Proceed field-at-a-time, rather than doc-at-a-time. for (size_t i = 0, max = Vec_Get_Size(fields); i < max; i++) { String *field = (String*)Vec_Fetch(fields, i); SortReader *sort_reader = (SortReader*)SegReader_Fetch( reader, Class_Get_Name(SORTREADER)); SortCache *cache = sort_reader ? SortReader_Fetch_Sort_Cache(sort_reader, field) : NULL; if (cache) { int32_t field_num = Seg_Field_Num(ivars->segment, field); SortFieldWriter *field_writer = S_lazy_init_field_writer(self, field_num); SortFieldWriter_Add_Segment(field_writer, reader, doc_map, cache); ivars->flush_at_finish = true; } } DECREF(fields); }
void SortColl_Set_Reader_IMP(SortCollector *self, SegReader *reader) { SortCollectorIVARS *const ivars = SortColl_IVARS(self); SortReader *sort_reader = (SortReader*)SegReader_Fetch(reader, Class_Get_Name(SORTREADER)); // Reset threshold variables and trigger auto-action behavior. MatchDocIVARS *const bumped_ivars = MatchDoc_IVARS(ivars->bumped); bumped_ivars->doc_id = INT32_MAX; ivars->bubble_doc = INT32_MAX; bumped_ivars->score = ivars->need_score ? CHY_F32_NEGINF : CHY_F32_NAN; ivars->bubble_score = ivars->need_score ? CHY_F32_NEGINF : CHY_F32_NAN; ivars->actions = ivars->auto_actions; // Obtain sort caches. Derive actions array for this segment. if (ivars->need_values && sort_reader) { for (uint32_t i = 0, max = ivars->num_rules; i < max; i++) { SortRule *rule = (SortRule*)Vec_Fetch(ivars->rules, i); String *field = SortRule_Get_Field(rule); SortCache *cache = field ? SortReader_Fetch_Sort_Cache(sort_reader, field) : NULL; ivars->sort_caches[i] = cache; ivars->derived_actions[i] = S_derive_action(rule, cache); if (cache) { ivars->ord_arrays[i] = SortCache_Get_Ords(cache); } else { ivars->ord_arrays[i] = NULL; } } } ivars->seg_doc_max = reader ? SegReader_Doc_Max(reader) : 0; SortColl_Set_Reader_t super_set_reader = (SortColl_Set_Reader_t)SUPER_METHOD_PTR(SORTCOLLECTOR, LUCY_SortColl_Set_Reader); super_set_reader(self, reader); }
Matcher* DefDelWriter_Seg_Deletions_IMP(DefaultDeletionsWriter *self, SegReader *seg_reader) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); Matcher *deletions = NULL; Segment *segment = SegReader_Get_Segment(seg_reader); String *seg_name = Seg_Get_Name(segment); Integer32 *tick_obj = (Integer32*)Hash_Fetch(ivars->name_to_tick, (Obj*)seg_name); int32_t tick = tick_obj ? Int32_Get_Value(tick_obj) : 0; SegReader *candidate = tick_obj ? (SegReader*)VA_Fetch(ivars->seg_readers, tick) : NULL; if (tick_obj) { DeletionsReader *del_reader = (DeletionsReader*)SegReader_Obtain( candidate, Class_Get_Name(DELETIONSREADER)); if (ivars->updated[tick] || DelReader_Del_Count(del_reader)) { BitVector *deldocs = (BitVector*)VA_Fetch(ivars->bit_vecs, tick); deletions = (Matcher*)BitVecMatcher_new(deldocs); } } else { // Sanity check. THROW(ERR, "Couldn't find SegReader %o", seg_reader); } return deletions; }
void DefDelWriter_Delete_By_Term_IMP(DefaultDeletionsWriter *self, String *field, Obj *term) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i); PostingListReader *plist_reader = (PostingListReader*)SegReader_Fetch( seg_reader, Class_Get_Name(POSTINGLISTREADER)); BitVector *bit_vec = (BitVector*)VA_Fetch(ivars->bit_vecs, i); PostingList *plist = plist_reader ? PListReader_Posting_List(plist_reader, field, term) : NULL; int32_t doc_id; int32_t num_zapped = 0; // Iterate through postings, marking each doc as deleted. if (plist) { while (0 != (doc_id = PList_Next(plist))) { num_zapped += !BitVec_Get(bit_vec, doc_id); BitVec_Set(bit_vec, doc_id); } if (num_zapped) { ivars->updated[i] = true; } DECREF(plist); } } }
static Obj* S_load_via_load_method(Class *klass, Obj *dump) { Obj *dummy = Class_Make_Obj(klass); Obj *loaded = NULL; if (Obj_is_a(dummy, ANALYZER)) { loaded = Analyzer_Load((Analyzer*)dummy, dump); } else if (Obj_is_a(dummy, DOC)) { loaded = (Obj*)Doc_Load((Doc*)dummy, dump); } else if (Obj_is_a(dummy, SIMILARITY)) { loaded = (Obj*)Sim_Load((Similarity*)dummy, dump); } else if (Obj_is_a(dummy, FIELDTYPE)) { loaded = FType_Load((FieldType*)dummy, dump); } else if (Obj_is_a(dummy, SCHEMA)) { loaded = (Obj*)Schema_Load((Schema*)dummy, dump); } else if (Obj_is_a(dummy, QUERY)) { loaded = Query_Load((Query*)dummy, dump); } else { DECREF(dummy); THROW(ERR, "Don't know how to load '%o'", Class_Get_Name(klass)); } DECREF(dummy); return loaded; }
Matcher* TermCompiler_Make_Matcher_IMP(TermCompiler *self, SegReader *reader, bool need_score) { TermCompilerIVARS *const ivars = TermCompiler_IVARS(self); TermQueryIVARS *const parent_ivars = TermQuery_IVARS((TermQuery*)ivars->parent); PostingListReader *plist_reader = (PostingListReader*)SegReader_Fetch( reader, Class_Get_Name(POSTINGLISTREADER)); PostingList *plist = plist_reader ? PListReader_Posting_List(plist_reader, parent_ivars->field, parent_ivars->term) : NULL; if (plist == NULL || PList_Get_Doc_Freq(plist) == 0) { DECREF(plist); return NULL; } else { Matcher *retval = PList_Make_Matcher(plist, ivars->sim, (Compiler*)self, need_score); DECREF(plist); return retval; } }
SegReader* SegReader_init(SegReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, Vector *segments, int32_t seg_tick) { Segment *segment; IxReader_init((IndexReader*)self, schema, folder, snapshot, segments, seg_tick, NULL); SegReaderIVARS *const ivars = SegReader_IVARS(self); segment = SegReader_Get_Segment(self); ivars->doc_max = (int32_t)Seg_Get_Count(segment); ivars->seg_name = (String*)INCREF(Seg_Get_Name(segment)); ivars->seg_num = Seg_Get_Number(segment); Err *error = Err_trap(S_try_init_components, self); if (error) { // An error occurred, so clean up self and rethrow the exception. DECREF(self); RETHROW(error); } DeletionsReader *del_reader = (DeletionsReader*)Hash_Fetch( ivars->components, Class_Get_Name(DELETIONSREADER)); ivars->del_count = del_reader ? DelReader_Del_Count(del_reader) : 0; return self; }
void Indexer_Add_Index_IMP(Indexer *self, Obj *index) { IndexerIVARS *const ivars = Indexer_IVARS(self); Folder *other_folder = NULL; IndexReader *reader = NULL; if (Obj_is_a(index, FOLDER)) { other_folder = (Folder*)INCREF(index); } else if (Obj_is_a(index, STRING)) { other_folder = (Folder*)FSFolder_new((String*)index); } else { THROW(ERR, "Invalid type for 'index': %o", Obj_get_class_name(index)); } reader = IxReader_open((Obj*)other_folder, NULL, NULL); if (reader == NULL) { THROW(ERR, "Index doesn't seem to contain any data"); } else { Schema *schema = ivars->schema; Schema *other_schema = IxReader_Get_Schema(reader); Vector *other_fields = Schema_All_Fields(other_schema); Vector *seg_readers = IxReader_Seg_Readers(reader); // Validate schema compatibility and add fields. Schema_Eat(schema, other_schema); // Add fields to Segment. for (size_t i = 0, max = Vec_Get_Size(other_fields); i < max; i++) { String *other_field = (String*)Vec_Fetch(other_fields, i); Seg_Add_Field(ivars->segment, other_field); } DECREF(other_fields); // Add all segments. for (size_t i = 0, max = Vec_Get_Size(seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(seg_readers, i); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Fetch( seg_reader, Class_Get_Name(DELETIONSREADER)); Matcher *deletions = del_reader ? DelReader_Iterator(del_reader) : NULL; I32Array *doc_map = DelWriter_Generate_Doc_Map( ivars->del_writer, deletions, SegReader_Doc_Max(seg_reader), (int32_t)Seg_Get_Count(ivars->segment)); SegWriter_Add_Segment(ivars->seg_writer, seg_reader, doc_map); DECREF(deletions); DECREF(doc_map); } DECREF(seg_readers); } DECREF(reader); DECREF(other_folder); }
Obj* Err_downcast(Obj *obj, Class *klass, const char *file, int line, const char *func) { if (obj && !SI_obj_is_a(obj, klass)) { Err_throw_at(ERR, file, line, func, "Can't downcast from %o to %o", Obj_get_class_name(obj), Class_Get_Name(klass)); } return obj; }
static void test_Is_A(TestBatchRunner *runner) { String *string = Str_new_from_trusted_utf8("", 0); Class *str_class = Str_Get_Class(string); String *class_name = Str_Get_Class_Name(string); TEST_TRUE(runner, Str_Is_A(string, STRING), "String Is_A String."); TEST_TRUE(runner, Str_Is_A(string, OBJ), "String Is_A Obj."); TEST_TRUE(runner, str_class == STRING, "Get_Class"); TEST_TRUE(runner, Str_Equals(Class_Get_Name(STRING), (Obj*)class_name), "Get_Class_Name"); DECREF(string); }
Matcher* PhraseCompiler_Make_Matcher_IMP(PhraseCompiler *self, SegReader *reader, bool need_score) { UNUSED_VAR(need_score); PhraseCompilerIVARS *const ivars = PhraseCompiler_IVARS(self); PhraseQueryIVARS *const parent_ivars = PhraseQuery_IVARS((PhraseQuery*)ivars->parent); Vector *const terms = parent_ivars->terms; uint32_t num_terms = Vec_Get_Size(terms); // Bail if there are no terms. if (!num_terms) { return NULL; } // Bail unless field is valid and posting type supports positions. Similarity *sim = PhraseCompiler_Get_Similarity(self); Posting *posting = Sim_Make_Posting(sim); if (posting == NULL || !Obj_is_a((Obj*)posting, SCOREPOSTING)) { DECREF(posting); return NULL; } DECREF(posting); // Bail if there's no PostingListReader for this segment. PostingListReader *const plist_reader = (PostingListReader*)SegReader_Fetch( reader, Class_Get_Name(POSTINGLISTREADER)); if (!plist_reader) { return NULL; } // Look up each term. Vector *plists = Vec_new(num_terms); for (uint32_t i = 0; i < num_terms; i++) { Obj *term = Vec_Fetch(terms, i); PostingList *plist = PListReader_Posting_List(plist_reader, parent_ivars->field, term); // Bail if any one of the terms isn't in the index. if (!plist || !PList_Get_Doc_Freq(plist)) { DECREF(plist); DECREF(plists); return NULL; } Vec_Push(plists, (Obj*)plist); } Matcher *retval = (Matcher*)PhraseMatcher_new(sim, plists, (Compiler*)self); DECREF(plists); return retval; }
void DefDelWriter_Merge_Segment_IMP(DefaultDeletionsWriter *self, SegReader *reader, I32Array *doc_map) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); UNUSED_VAR(doc_map); Segment *segment = SegReader_Get_Segment(reader); Hash *del_meta = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "deletions", 9); if (del_meta) { Vector *seg_readers = ivars->seg_readers; Hash *files = (Hash*)Hash_Fetch_Utf8(del_meta, "files", 5); if (files) { HashIterator *iter = HashIter_new(files); while (HashIter_Next(iter)) { String *seg = HashIter_Get_Key(iter); Hash *mini_meta = (Hash*)HashIter_Get_Value(iter); /* Find the segment the deletions from the SegReader * we're adding correspond to. If it's gone, we don't * need to worry about losing deletions files that point * at it. */ for (size_t i = 0, max = Vec_Get_Size(seg_readers); i < max; i++) { SegReader *candidate = (SegReader*)Vec_Fetch(seg_readers, i); String *candidate_name = Seg_Get_Name(SegReader_Get_Segment(candidate)); if (Str_Equals(seg, (Obj*)candidate_name)) { /* If the count hasn't changed, we're about to * merge away the most recent deletions file * pointing at this target segment -- so force a * new file to be written out. */ int32_t count = (int32_t)Json_obj_to_i64(Hash_Fetch_Utf8(mini_meta, "count", 5)); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Obtain( candidate, Class_Get_Name(DELETIONSREADER)); if (count == DelReader_Del_Count(del_reader)) { ivars->updated[i] = true; } break; } } } DECREF(iter); } } }
DefaultDeletionsWriter* DefDelWriter_init(DefaultDeletionsWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader) { DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader); DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); ivars->seg_readers = PolyReader_Seg_Readers(polyreader); uint32_t num_seg_readers = VA_Get_Size(ivars->seg_readers); ivars->seg_starts = PolyReader_Offsets(polyreader); ivars->bit_vecs = VA_new(num_seg_readers); ivars->updated = (bool*)CALLOCATE(num_seg_readers, sizeof(bool)); ivars->searcher = IxSearcher_new((Obj*)polyreader); ivars->name_to_tick = Hash_new(num_seg_readers); // Materialize a BitVector of deletions for each segment. for (uint32_t i = 0; i < num_seg_readers; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i); BitVector *bit_vec = BitVec_new(SegReader_Doc_Max(seg_reader)); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Fetch( seg_reader, Class_Get_Name(DELETIONSREADER)); Matcher *seg_dels = del_reader ? DelReader_Iterator(del_reader) : NULL; if (seg_dels) { int32_t del; while (0 != (del = Matcher_Next(seg_dels))) { BitVec_Set(bit_vec, del); } DECREF(seg_dels); } VA_Store(ivars->bit_vecs, i, (Obj*)bit_vec); Hash_Store(ivars->name_to_tick, (Obj*)SegReader_Get_Seg_Name(seg_reader), (Obj*)Int32_new(i)); } return self; }
void HLWriter_Add_Segment_IMP(HighlightWriter *self, SegReader *reader, I32Array *doc_map) { HighlightWriterIVARS *const ivars = HLWriter_IVARS(self); int32_t doc_max = SegReader_Doc_Max(reader); if (doc_max == 0) { // Bail if the supplied segment is empty. return; } else { DefaultHighlightReader *hl_reader = (DefaultHighlightReader*)CERTIFY( SegReader_Obtain(reader, Class_Get_Name(HIGHLIGHTREADER)), DEFAULTHIGHLIGHTREADER); OutStream *dat_out = S_lazy_init(self); OutStream *ix_out = ivars->ix_out; int32_t orig; ByteBuf *bb = BB_new(0); for (orig = 1; orig <= doc_max; orig++) { // Skip deleted docs. if (doc_map && !I32Arr_Get(doc_map, orig)) { continue; } // Write file pointer. OutStream_Write_I64(ix_out, OutStream_Tell(dat_out)); // Copy the raw record. DefHLReader_Read_Record(hl_reader, orig, bb); OutStream_Write_Bytes(dat_out, BB_Get_Buf(bb), BB_Get_Size(bb)); BB_Set_Size(bb, 0); } DECREF(bb); } }
static Class* S_simple_subclass(Class *parent, String *name) { if (parent->flags & CFISH_fFINAL) { THROW(ERR, "Can't subclass final class %o", Class_Get_Name(parent)); } Class *subclass = (Class*)Memory_wrapped_calloc(parent->class_alloc_size, 1); Class_Init_Obj(parent->klass, subclass); subclass->parent = parent; subclass->flags = parent->flags; subclass->obj_alloc_size = parent->obj_alloc_size; subclass->class_alloc_size = parent->class_alloc_size; subclass->methods = (Method**)CALLOCATE(1, sizeof(Method*)); S_set_name(subclass, Str_Get_Ptr8(name), Str_Get_Size(name)); memcpy(subclass->vtable, parent->vtable, parent->class_alloc_size - offsetof(Class, vtable)); return subclass; }
void DocWriter_Add_Segment_IMP(DocWriter *self, SegReader *reader, I32Array *doc_map) { DocWriterIVARS *const ivars = DocWriter_IVARS(self); int32_t doc_max = SegReader_Doc_Max(reader); if (doc_max == 0) { // Bail if the supplied segment is empty. return; } else { OutStream *const dat_out = S_lazy_init(self); OutStream *const ix_out = ivars->ix_out; ByteBuf *const buffer = BB_new(0); DefaultDocReader *const doc_reader = (DefaultDocReader*)CERTIFY( SegReader_Obtain(reader, Class_Get_Name(DOCREADER)), DEFAULTDOCREADER); for (int32_t i = 1, max = SegReader_Doc_Max(reader); i <= max; i++) { if (I32Arr_Get(doc_map, (size_t)i)) { int64_t start = OutStream_Tell(dat_out); // Copy record over. DefDocReader_Read_Record(doc_reader, buffer, i); const char *buf = BB_Get_Buf(buffer); size_t size = BB_Get_Size(buffer); OutStream_Write_Bytes(dat_out, buf, size); // Write file pointer. OutStream_Write_I64(ix_out, start); } } DECREF(buffer); } }
void Err_abstract_method_call(Obj *obj, Class *klass, const char *method_name) { String *class_name = obj ? Obj_get_class_name(obj) : Class_Get_Name(klass); THROW(ERR, "Abstract method '%s' not defined by %o", method_name, class_name); }
static bool S_merge_updated_deletions(BackgroundMerger *self) { BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self); Hash *updated_deletions = NULL; PolyReader *new_polyreader = PolyReader_open((Obj*)ivars->folder, NULL, NULL); Vector *new_seg_readers = PolyReader_Get_Seg_Readers(new_polyreader); Vector *old_seg_readers = PolyReader_Get_Seg_Readers(ivars->polyreader); Hash *new_segs = Hash_new(Vec_Get_Size(new_seg_readers)); for (uint32_t i = 0, max = Vec_Get_Size(new_seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(new_seg_readers, i); String *seg_name = SegReader_Get_Seg_Name(seg_reader); Hash_Store(new_segs, seg_name, INCREF(seg_reader)); } for (uint32_t i = 0, max = Vec_Get_Size(old_seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(old_seg_readers, i); String *seg_name = SegReader_Get_Seg_Name(seg_reader); // If this segment was merged away... if (Hash_Fetch(ivars->doc_maps, seg_name)) { SegReader *new_seg_reader = (SegReader*)CERTIFY( Hash_Fetch(new_segs, seg_name), SEGREADER); int32_t old_del_count = SegReader_Del_Count(seg_reader); int32_t new_del_count = SegReader_Del_Count(new_seg_reader); // ... were any new deletions applied against it? if (old_del_count != new_del_count) { DeletionsReader *del_reader = (DeletionsReader*)SegReader_Obtain( new_seg_reader, Class_Get_Name(DELETIONSREADER)); if (!updated_deletions) { updated_deletions = Hash_new(max); } Hash_Store(updated_deletions, seg_name, (Obj*)DelReader_Iterator(del_reader)); } } } DECREF(new_polyreader); DECREF(new_segs); if (!updated_deletions) { return false; } else { PolyReader *merge_polyreader = PolyReader_open((Obj*)ivars->folder, ivars->snapshot, NULL); Vector *merge_seg_readers = PolyReader_Get_Seg_Readers(merge_polyreader); Snapshot *latest_snapshot = Snapshot_Read_File(Snapshot_new(), ivars->folder, NULL); int64_t new_seg_num = IxManager_Highest_Seg_Num(ivars->manager, latest_snapshot) + 1; Segment *new_segment = Seg_new(new_seg_num); SegWriter *seg_writer = SegWriter_new(ivars->schema, ivars->snapshot, new_segment, merge_polyreader); DeletionsWriter *del_writer = SegWriter_Get_Del_Writer(seg_writer); int64_t merge_seg_num = Seg_Get_Number(ivars->segment); uint32_t seg_tick = INT32_MAX; int32_t offset = INT32_MAX; SegWriter_Prep_Seg_Dir(seg_writer); for (uint32_t i = 0, max = Vec_Get_Size(merge_seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(merge_seg_readers, i); if (SegReader_Get_Seg_Num(seg_reader) == merge_seg_num) { I32Array *offsets = PolyReader_Offsets(merge_polyreader); seg_tick = i; offset = I32Arr_Get(offsets, seg_tick); DECREF(offsets); } } if (offset == INT32_MAX) { THROW(ERR, "Failed sanity check"); } HashIterator *iter = HashIter_new(updated_deletions); while (HashIter_Next(iter)) { String *seg_name = HashIter_Get_Key(iter); Matcher *deletions = (Matcher*)HashIter_Get_Value(iter); I32Array *doc_map = (I32Array*)CERTIFY( Hash_Fetch(ivars->doc_maps, seg_name), I32ARRAY); int32_t del; while (0 != (del = Matcher_Next(deletions))) { // Find the slot where the deleted doc resides in the // rewritten segment. If the doc was already deleted when we // were merging, do nothing. int32_t remapped = I32Arr_Get(doc_map, del); if (remapped) { // It's a new deletion, so carry it forward and zap it in // the rewritten segment. DelWriter_Delete_By_Doc_ID(del_writer, remapped + offset); } } } DECREF(iter); // Finish the segment and clean up. DelWriter_Finish(del_writer); SegWriter_Finish(seg_writer); DECREF(seg_writer); DECREF(new_segment); DECREF(latest_snapshot); DECREF(merge_polyreader); DECREF(updated_deletions); } return true; }
void LexIndex_Seek_IMP(LexIndex *self, Obj *target) { LexIndexIVARS *const ivars = LexIndex_IVARS(self); TermStepper *term_stepper = ivars->term_stepper; InStream *ix_in = ivars->ix_in; FieldType *type = ivars->field_type; int32_t lo = 0; int32_t hi = ivars->size - 1; int32_t result = -100; if (target == NULL || ivars->size == 0) { ivars->tick = 0; return; } else { if (!Obj_is_a(target, STRING)) { THROW(ERR, "Target is a %o, and not comparable to a %o", Obj_get_class_name(target), Class_Get_Name(STRING)); } /* TODO: Obj *first_obj = Vec_Fetch(terms, 0); if (!Obj_is_a(target, Obj_get_class(first_obj))) { THROW(ERR, "Target is a %o, and not comparable to a %o", Obj_get_class_name(target), Obj_get_class_name(first_obj)); } */ } // Divide and conquer. while (hi >= lo) { const int32_t mid = lo + ((hi - lo) / 2); const int64_t offset = (int64_t)NumUtil_decode_bigend_u64(ivars->offsets + mid); InStream_Seek(ix_in, offset); TermStepper_Read_Key_Frame(term_stepper, ix_in); // Compare values. There is no need for a NULL-check because the term // number is alway between 0 and ivars->size - 1. Obj *value = TermStepper_Get_Value(term_stepper); int32_t comparison = FType_Compare_Values(type, target, value); if (comparison < 0) { hi = mid - 1; } else if (comparison > 0) { lo = mid + 1; } else { result = mid; break; } } // Record the index of the entry we've seeked to, then read entry. ivars->tick = hi == -1 // indicating that target lt first entry ? 0 : result == -100 // if result is still -100, it wasn't set ? hi : result; S_read_entry(self); }