void PostPool_Add_Segment_IMP(PostingPool *self, SegReader *reader, I32Array *doc_map, int32_t doc_base) { PostingPoolIVARS *const ivars = PostPool_IVARS(self); LexiconReader *lex_reader = (LexiconReader*)SegReader_Fetch( reader, Class_Get_Name(LEXICONREADER)); Lexicon *lexicon = lex_reader ? LexReader_Lexicon(lex_reader, ivars->field, NULL) : NULL; if (lexicon) { PostingListReader *plist_reader = (PostingListReader*)SegReader_Fetch( reader, Class_Get_Name(POSTINGLISTREADER)); PostingList *plist = plist_reader ? PListReader_Posting_List(plist_reader, ivars->field, NULL) : NULL; if (!plist) { THROW(ERR, "Got a Lexicon but no PostingList for '%o' in '%o'", ivars->field, SegReader_Get_Seg_Name(reader)); } PostingPool *run = PostPool_new(ivars->schema, ivars->snapshot, ivars->segment, ivars->polyreader, ivars->field, ivars->lex_writer, ivars->mem_pool, ivars->lex_temp_out, ivars->post_temp_out, ivars->skip_out); PostingPoolIVARS *const run_ivars = PostPool_IVARS(run); run_ivars->lexicon = lexicon; run_ivars->plist = plist; run_ivars->doc_base = doc_base; run_ivars->doc_map = (I32Array*)INCREF(doc_map); PostPool_Add_Run(self, (SortExternal*)run); } }
void DefDelWriter_Delete_By_Term_IMP(DefaultDeletionsWriter *self, String *field, Obj *term) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i); PostingListReader *plist_reader = (PostingListReader*)SegReader_Fetch( seg_reader, Class_Get_Name(POSTINGLISTREADER)); BitVector *bit_vec = (BitVector*)VA_Fetch(ivars->bit_vecs, i); PostingList *plist = plist_reader ? PListReader_Posting_List(plist_reader, field, term) : NULL; int32_t doc_id; int32_t num_zapped = 0; // Iterate through postings, marking each doc as deleted. if (plist) { while (0 != (doc_id = PList_Next(plist))) { num_zapped += !BitVec_Get(bit_vec, doc_id); BitVec_Set(bit_vec, doc_id); } if (num_zapped) { ivars->updated[i] = true; } DECREF(plist); } } }
Matcher* RangeCompiler_Make_Matcher_IMP(RangeCompiler *self, SegReader *reader, bool need_score) { RangeQuery *parent = (RangeQuery*)RangeCompiler_IVARS(self)->parent; String *field = RangeQuery_IVARS(parent)->field; SortReader *sort_reader = (SortReader*)SegReader_Fetch(reader, Class_Get_Name(SORTREADER)); SortCache *sort_cache = sort_reader ? SortReader_Fetch_Sort_Cache(sort_reader, field) : NULL; UNUSED_VAR(need_score); if (!sort_cache) { return NULL; } else { int32_t lower = S_find_lower_bound(self, sort_cache); int32_t upper = S_find_upper_bound(self, sort_cache); int32_t max_ord = SortCache_Get_Cardinality(sort_cache) + 1; if (lower > max_ord || upper < 0) { return NULL; } else { int32_t doc_max = SegReader_Doc_Max(reader); return (Matcher*)RangeMatcher_new(lower, upper, sort_cache, doc_max); } } }
void SortColl_set_reader(SortCollector *self, SegReader *reader) { SortCollectorIVARS *const ivars = SortColl_IVARS(self); SortReader *sort_reader = (SortReader*)SegReader_Fetch(reader, VTable_Get_Name(SORTREADER)); // Reset threshold variables and trigger auto-action behavior. MatchDocIVARS *const bumped_ivars = MatchDoc_IVARS(ivars->bumped); bumped_ivars->doc_id = INT32_MAX; ivars->bubble_doc = INT32_MAX; bumped_ivars->score = ivars->need_score ? F32_NEGINF : F32_NAN; ivars->bubble_score = ivars->need_score ? F32_NEGINF : F32_NAN; ivars->actions = ivars->auto_actions; // Obtain sort caches. Derive actions array for this segment. if (ivars->need_values && sort_reader) { for (uint32_t i = 0, max = ivars->num_rules; i < max; i++) { SortRule *rule = (SortRule*)VA_Fetch(ivars->rules, i); CharBuf *field = SortRule_Get_Field(rule); SortCache *cache = field ? SortReader_Fetch_Sort_Cache(sort_reader, field) : NULL; ivars->sort_caches[i] = cache; ivars->derived_actions[i] = S_derive_action(rule, cache); if (cache) { ivars->ord_arrays[i] = SortCache_Get_Ords(cache); } else { ivars->ord_arrays[i] = NULL; } } } ivars->seg_doc_max = reader ? SegReader_Doc_Max(reader) : 0; Coll_set_reader((Collector*)self, reader); }
void SortWriter_add_segment(SortWriter *self, SegReader *reader, I32Array *doc_map) { SortWriterIVARS *const ivars = SortWriter_IVARS(self); VArray *fields = Schema_All_Fields(ivars->schema); // Proceed field-at-a-time, rather than doc-at-a-time. for (uint32_t i = 0, max = VA_Get_Size(fields); i < max; i++) { CharBuf *field = (CharBuf*)VA_Fetch(fields, i); SortReader *sort_reader = (SortReader*)SegReader_Fetch( reader, VTable_Get_Name(SORTREADER)); SortCache *cache = sort_reader ? SortReader_Fetch_Sort_Cache(sort_reader, field) : NULL; if (cache) { int32_t field_num = Seg_Field_Num(ivars->segment, field); SortFieldWriter *field_writer = S_lazy_init_field_writer(self, field_num); SortFieldWriter_Add_Segment(field_writer, reader, doc_map, cache); ivars->flush_at_finish = true; } } DECREF(fields); }
Matcher* TermCompiler_Make_Matcher_IMP(TermCompiler *self, SegReader *reader, bool need_score) { TermCompilerIVARS *const ivars = TermCompiler_IVARS(self); TermQueryIVARS *const parent_ivars = TermQuery_IVARS((TermQuery*)ivars->parent); PostingListReader *plist_reader = (PostingListReader*)SegReader_Fetch( reader, Class_Get_Name(POSTINGLISTREADER)); PostingList *plist = plist_reader ? PListReader_Posting_List(plist_reader, parent_ivars->field, parent_ivars->term) : NULL; if (plist == NULL || PList_Get_Doc_Freq(plist) == 0) { DECREF(plist); return NULL; } else { Matcher *retval = PList_Make_Matcher(plist, ivars->sim, (Compiler*)self, need_score); DECREF(plist); return retval; } }
void Indexer_add_index(Indexer *self, Obj *index) { Folder *other_folder = NULL; IndexReader *reader = NULL; if (Obj_Is_A(index, FOLDER)) { other_folder = (Folder*)INCREF(index); } else if (Obj_Is_A(index, CHARBUF)) { other_folder = (Folder*)FSFolder_new((CharBuf*)index); } else { THROW(ERR, "Invalid type for 'index': %o", Obj_Get_Class_Name(index)); } reader = IxReader_open((Obj*)other_folder, NULL, NULL); if (reader == NULL) { THROW(ERR, "Index doesn't seem to contain any data"); } else { Schema *schema = self->schema; Schema *other_schema = IxReader_Get_Schema(reader); VArray *other_fields = Schema_All_Fields(other_schema); VArray *seg_readers = IxReader_Seg_Readers(reader); uint32_t i, max; // Validate schema compatibility and add fields. Schema_Eat(schema, other_schema); // Add fields to Segment. for (i = 0, max = VA_Get_Size(other_fields); i < max; i++) { CharBuf *other_field = (CharBuf*)VA_Fetch(other_fields, i); Seg_Add_Field(self->segment, other_field); } DECREF(other_fields); // Add all segments. for (i = 0, max = VA_Get_Size(seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(seg_readers, i); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Fetch( seg_reader, VTable_Get_Name(DELETIONSREADER)); Matcher *deletions = del_reader ? DelReader_Iterator(del_reader) : NULL; I32Array *doc_map = DelWriter_Generate_Doc_Map(self->del_writer, deletions, SegReader_Doc_Max(seg_reader), (int32_t)Seg_Get_Count(self->segment) ); SegWriter_Add_Segment(self->seg_writer, seg_reader, doc_map); DECREF(deletions); DECREF(doc_map); } DECREF(seg_readers); } DECREF(reader); DECREF(other_folder); }
void Indexer_Add_Index_IMP(Indexer *self, Obj *index) { IndexerIVARS *const ivars = Indexer_IVARS(self); Folder *other_folder = NULL; IndexReader *reader = NULL; if (Obj_is_a(index, FOLDER)) { other_folder = (Folder*)INCREF(index); } else if (Obj_is_a(index, STRING)) { other_folder = (Folder*)FSFolder_new((String*)index); } else { THROW(ERR, "Invalid type for 'index': %o", Obj_get_class_name(index)); } reader = IxReader_open((Obj*)other_folder, NULL, NULL); if (reader == NULL) { THROW(ERR, "Index doesn't seem to contain any data"); } else { Schema *schema = ivars->schema; Schema *other_schema = IxReader_Get_Schema(reader); Vector *other_fields = Schema_All_Fields(other_schema); Vector *seg_readers = IxReader_Seg_Readers(reader); // Validate schema compatibility and add fields. Schema_Eat(schema, other_schema); // Add fields to Segment. for (size_t i = 0, max = Vec_Get_Size(other_fields); i < max; i++) { String *other_field = (String*)Vec_Fetch(other_fields, i); Seg_Add_Field(ivars->segment, other_field); } DECREF(other_fields); // Add all segments. for (size_t i = 0, max = Vec_Get_Size(seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(seg_readers, i); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Fetch( seg_reader, Class_Get_Name(DELETIONSREADER)); Matcher *deletions = del_reader ? DelReader_Iterator(del_reader) : NULL; I32Array *doc_map = DelWriter_Generate_Doc_Map( ivars->del_writer, deletions, SegReader_Doc_Max(seg_reader), (int32_t)Seg_Get_Count(ivars->segment)); SegWriter_Add_Segment(ivars->seg_writer, seg_reader, doc_map); DECREF(deletions); DECREF(doc_map); } DECREF(seg_readers); } DECREF(reader); DECREF(other_folder); }
Matcher* PhraseCompiler_make_matcher(PhraseCompiler *self, SegReader *reader, bool_t need_score) { PostingsReader *const post_reader = (PostingsReader*)SegReader_Fetch( reader, POSTINGSREADER.name); PhraseQuery *const parent = (PhraseQuery*)self->parent; VArray *const terms = parent->terms; u32_t num_terms = VA_Get_Size(terms); Schema *schema = SegReader_Get_Schema(reader); Posting *posting = Schema_Fetch_Posting(schema, parent->field); VArray *plists; Matcher *retval; u32_t i; UNUSED_VAR(need_score); /* Bail if there are no terms. */ if (!num_terms) return NULL; /* Bail unless field is valid and posting type supports positions. */ if (posting == NULL || !OBJ_IS_A(posting, SCOREPOSTING)) return NULL; /* Bail if there's no PostingsReader for this segment. */ if (!post_reader) { return NULL; } /* Look up each term. */ plists = VA_new(num_terms); for (i = 0; i < num_terms; i++) { Obj *term = VA_Fetch(terms, i); PostingList *plist = PostReader_Posting_List(post_reader, parent->field, term); /* Bail if any one of the terms isn't in the index. */ if (!plist || !PList_Get_Doc_Freq(plist)) { DECREF(plist); DECREF(plists); return NULL; } VA_Push(plists, (Obj*)plist); } retval = (Matcher*)PhraseScorer_new( Compiler_Get_Similarity(self), plists, (Compiler*)self ); DECREF(plists); return retval; }
Matcher* PhraseCompiler_Make_Matcher_IMP(PhraseCompiler *self, SegReader *reader, bool need_score) { UNUSED_VAR(need_score); PhraseCompilerIVARS *const ivars = PhraseCompiler_IVARS(self); PhraseQueryIVARS *const parent_ivars = PhraseQuery_IVARS((PhraseQuery*)ivars->parent); Vector *const terms = parent_ivars->terms; uint32_t num_terms = Vec_Get_Size(terms); // Bail if there are no terms. if (!num_terms) { return NULL; } // Bail unless field is valid and posting type supports positions. Similarity *sim = PhraseCompiler_Get_Similarity(self); Posting *posting = Sim_Make_Posting(sim); if (posting == NULL || !Obj_is_a((Obj*)posting, SCOREPOSTING)) { DECREF(posting); return NULL; } DECREF(posting); // Bail if there's no PostingListReader for this segment. PostingListReader *const plist_reader = (PostingListReader*)SegReader_Fetch( reader, Class_Get_Name(POSTINGLISTREADER)); if (!plist_reader) { return NULL; } // Look up each term. Vector *plists = Vec_new(num_terms); for (uint32_t i = 0; i < num_terms; i++) { Obj *term = Vec_Fetch(terms, i); PostingList *plist = PListReader_Posting_List(plist_reader, parent_ivars->field, term); // Bail if any one of the terms isn't in the index. if (!plist || !PList_Get_Doc_Freq(plist)) { DECREF(plist); DECREF(plists); return NULL; } Vec_Push(plists, (Obj*)plist); } Matcher *retval = (Matcher*)PhraseMatcher_new(sim, plists, (Compiler*)self); DECREF(plists); return retval; }
DefaultDeletionsWriter* DefDelWriter_init(DefaultDeletionsWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader) { DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader); DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); ivars->seg_readers = PolyReader_Seg_Readers(polyreader); uint32_t num_seg_readers = VA_Get_Size(ivars->seg_readers); ivars->seg_starts = PolyReader_Offsets(polyreader); ivars->bit_vecs = VA_new(num_seg_readers); ivars->updated = (bool*)CALLOCATE(num_seg_readers, sizeof(bool)); ivars->searcher = IxSearcher_new((Obj*)polyreader); ivars->name_to_tick = Hash_new(num_seg_readers); // Materialize a BitVector of deletions for each segment. for (uint32_t i = 0; i < num_seg_readers; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i); BitVector *bit_vec = BitVec_new(SegReader_Doc_Max(seg_reader)); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Fetch( seg_reader, Class_Get_Name(DELETIONSREADER)); Matcher *seg_dels = del_reader ? DelReader_Iterator(del_reader) : NULL; if (seg_dels) { int32_t del; while (0 != (del = Matcher_Next(seg_dels))) { BitVec_Set(bit_vec, del); } DECREF(seg_dels); } VA_Store(ivars->bit_vecs, i, (Obj*)bit_vec); Hash_Store(ivars->name_to_tick, (Obj*)SegReader_Get_Seg_Name(seg_reader), (Obj*)Int32_new(i)); } return self; }