Пример #1
0
void
PostPool_Add_Segment_IMP(PostingPool *self, SegReader *reader,
                         I32Array *doc_map, int32_t doc_base) {
    PostingPoolIVARS *const ivars = PostPool_IVARS(self);
    LexiconReader *lex_reader = (LexiconReader*)SegReader_Fetch(
                                    reader, Class_Get_Name(LEXICONREADER));
    Lexicon *lexicon = lex_reader
                       ? LexReader_Lexicon(lex_reader, ivars->field, NULL)
                       : NULL;

    if (lexicon) {
        PostingListReader *plist_reader
            = (PostingListReader*)SegReader_Fetch(
                  reader, Class_Get_Name(POSTINGLISTREADER));
        PostingList *plist = plist_reader
                             ? PListReader_Posting_List(plist_reader, ivars->field, NULL)
                             : NULL;
        if (!plist) {
            THROW(ERR, "Got a Lexicon but no PostingList for '%o' in '%o'",
                  ivars->field, SegReader_Get_Seg_Name(reader));
        }
        PostingPool *run
            = PostPool_new(ivars->schema, ivars->snapshot, ivars->segment,
                           ivars->polyreader, ivars->field, ivars->lex_writer,
                           ivars->mem_pool, ivars->lex_temp_out,
                           ivars->post_temp_out, ivars->skip_out);
        PostingPoolIVARS *const run_ivars = PostPool_IVARS(run);
        run_ivars->lexicon  = lexicon;
        run_ivars->plist    = plist;
        run_ivars->doc_base = doc_base;
        run_ivars->doc_map  = (I32Array*)INCREF(doc_map);
        PostPool_Add_Run(self, (SortExternal*)run);
    }
}
Пример #2
0
void
DefDelWriter_Delete_By_Term_IMP(DefaultDeletionsWriter *self,
                                String *field, Obj *term) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) {
        SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i);
        PostingListReader *plist_reader
            = (PostingListReader*)SegReader_Fetch(
                  seg_reader, Class_Get_Name(POSTINGLISTREADER));
        BitVector *bit_vec = (BitVector*)VA_Fetch(ivars->bit_vecs, i);
        PostingList *plist = plist_reader
                             ? PListReader_Posting_List(plist_reader, field, term)
                             : NULL;
        int32_t doc_id;
        int32_t num_zapped = 0;

        // Iterate through postings, marking each doc as deleted.
        if (plist) {
            while (0 != (doc_id = PList_Next(plist))) {
                num_zapped += !BitVec_Get(bit_vec, doc_id);
                BitVec_Set(bit_vec, doc_id);
            }
            if (num_zapped) { ivars->updated[i] = true; }
            DECREF(plist);
        }
    }
}
Пример #3
0
Matcher*
RangeCompiler_Make_Matcher_IMP(RangeCompiler *self, SegReader *reader,
                               bool need_score) {
    RangeQuery *parent = (RangeQuery*)RangeCompiler_IVARS(self)->parent;
    String *field = RangeQuery_IVARS(parent)->field;
    SortReader *sort_reader
        = (SortReader*)SegReader_Fetch(reader, Class_Get_Name(SORTREADER));
    SortCache *sort_cache = sort_reader
                            ? SortReader_Fetch_Sort_Cache(sort_reader, field)
                            : NULL;
    UNUSED_VAR(need_score);

    if (!sort_cache) {
        return NULL;
    }
    else {
        int32_t lower = S_find_lower_bound(self, sort_cache);
        int32_t upper = S_find_upper_bound(self, sort_cache);
        int32_t max_ord = SortCache_Get_Cardinality(sort_cache) + 1;
        if (lower > max_ord || upper < 0) {
            return NULL;
        }
        else {
            int32_t doc_max = SegReader_Doc_Max(reader);
            return (Matcher*)RangeMatcher_new(lower, upper, sort_cache,
                                              doc_max);
        }
    }
}
Пример #4
0
void
SortColl_set_reader(SortCollector *self, SegReader *reader) {
    SortCollectorIVARS *const ivars = SortColl_IVARS(self);
    SortReader *sort_reader
        = (SortReader*)SegReader_Fetch(reader, VTable_Get_Name(SORTREADER));

    // Reset threshold variables and trigger auto-action behavior.
    MatchDocIVARS *const bumped_ivars = MatchDoc_IVARS(ivars->bumped);
    bumped_ivars->doc_id = INT32_MAX;
    ivars->bubble_doc    = INT32_MAX;
    bumped_ivars->score  = ivars->need_score ? F32_NEGINF : F32_NAN;
    ivars->bubble_score  = ivars->need_score ? F32_NEGINF : F32_NAN;
    ivars->actions       = ivars->auto_actions;

    // Obtain sort caches. Derive actions array for this segment.
    if (ivars->need_values && sort_reader) {
        for (uint32_t i = 0, max = ivars->num_rules; i < max; i++) {
            SortRule  *rule  = (SortRule*)VA_Fetch(ivars->rules, i);
            CharBuf   *field = SortRule_Get_Field(rule);
            SortCache *cache = field
                               ? SortReader_Fetch_Sort_Cache(sort_reader, field)
                               : NULL;
            ivars->sort_caches[i] = cache;
            ivars->derived_actions[i] = S_derive_action(rule, cache);
            if (cache) { ivars->ord_arrays[i] = SortCache_Get_Ords(cache); }
            else       { ivars->ord_arrays[i] = NULL; }
        }
    }
    ivars->seg_doc_max = reader ? SegReader_Doc_Max(reader) : 0;
    Coll_set_reader((Collector*)self, reader);
}
Пример #5
0
void
SortWriter_add_segment(SortWriter *self, SegReader *reader,
                       I32Array *doc_map) {
    SortWriterIVARS *const ivars = SortWriter_IVARS(self);
    VArray *fields = Schema_All_Fields(ivars->schema);

    // Proceed field-at-a-time, rather than doc-at-a-time.
    for (uint32_t i = 0, max = VA_Get_Size(fields); i < max; i++) {
        CharBuf *field = (CharBuf*)VA_Fetch(fields, i);
        SortReader *sort_reader = (SortReader*)SegReader_Fetch(
                                      reader, VTable_Get_Name(SORTREADER));
        SortCache *cache = sort_reader
                           ? SortReader_Fetch_Sort_Cache(sort_reader, field)
                           : NULL;
        if (cache) {
            int32_t field_num = Seg_Field_Num(ivars->segment, field);
            SortFieldWriter *field_writer
                = S_lazy_init_field_writer(self, field_num);
            SortFieldWriter_Add_Segment(field_writer, reader, doc_map, cache);
            ivars->flush_at_finish = true;
        }
    }

    DECREF(fields);
}
Пример #6
0
Matcher*
TermCompiler_Make_Matcher_IMP(TermCompiler *self, SegReader *reader,
                              bool need_score) {
    TermCompilerIVARS *const ivars = TermCompiler_IVARS(self);
    TermQueryIVARS *const parent_ivars
        = TermQuery_IVARS((TermQuery*)ivars->parent);
    PostingListReader *plist_reader
        = (PostingListReader*)SegReader_Fetch(
              reader, Class_Get_Name(POSTINGLISTREADER));
    PostingList *plist = plist_reader
                         ? PListReader_Posting_List(plist_reader,
                                                    parent_ivars->field,
                                                    parent_ivars->term)
                         : NULL;

    if (plist == NULL || PList_Get_Doc_Freq(plist) == 0) {
        DECREF(plist);
        return NULL;
    }
    else {
        Matcher *retval = PList_Make_Matcher(plist, ivars->sim,
                                             (Compiler*)self, need_score);
        DECREF(plist);
        return retval;
    }
}
Пример #7
0
void
Indexer_add_index(Indexer *self, Obj *index)
{
    Folder *other_folder = NULL;
    IndexReader *reader  = NULL;
    
    if (Obj_Is_A(index, FOLDER)) {
        other_folder = (Folder*)INCREF(index);
    }
    else if (Obj_Is_A(index, CHARBUF)) {
        other_folder = (Folder*)FSFolder_new((CharBuf*)index);
    }
    else {
        THROW(ERR, "Invalid type for 'index': %o", Obj_Get_Class_Name(index));
    }

    reader = IxReader_open((Obj*)other_folder, NULL, NULL);
    if (reader == NULL) {
        THROW(ERR, "Index doesn't seem to contain any data");
    }
    else {
        Schema *schema       = self->schema;
        Schema *other_schema = IxReader_Get_Schema(reader);
        VArray *other_fields = Schema_All_Fields(other_schema);
        VArray *seg_readers  = IxReader_Seg_Readers(reader);
        uint32_t i, max;

        // Validate schema compatibility and add fields. 
        Schema_Eat(schema, other_schema);

        // Add fields to Segment. 
        for (i = 0, max = VA_Get_Size(other_fields); i < max; i++) {
            CharBuf *other_field = (CharBuf*)VA_Fetch(other_fields, i);
            Seg_Add_Field(self->segment, other_field);
        }
        DECREF(other_fields);

        // Add all segments. 
        for (i = 0, max = VA_Get_Size(seg_readers); i < max; i++) {
            SegReader *seg_reader = (SegReader*)VA_Fetch(seg_readers, i);
            DeletionsReader *del_reader = (DeletionsReader*)SegReader_Fetch(
                seg_reader, VTable_Get_Name(DELETIONSREADER));
            Matcher *deletions = del_reader 
                               ? DelReader_Iterator(del_reader) 
                               : NULL;
            I32Array *doc_map = DelWriter_Generate_Doc_Map(self->del_writer,
                deletions, SegReader_Doc_Max(seg_reader),
                (int32_t)Seg_Get_Count(self->segment) 
            );
            SegWriter_Add_Segment(self->seg_writer, seg_reader, doc_map);
            DECREF(deletions);
            DECREF(doc_map);
        }
        DECREF(seg_readers);
    }

    DECREF(reader);
    DECREF(other_folder);
}
Пример #8
0
void
Indexer_Add_Index_IMP(Indexer *self, Obj *index) {
    IndexerIVARS *const ivars = Indexer_IVARS(self);
    Folder *other_folder = NULL;
    IndexReader *reader  = NULL;

    if (Obj_is_a(index, FOLDER)) {
        other_folder = (Folder*)INCREF(index);
    }
    else if (Obj_is_a(index, STRING)) {
        other_folder = (Folder*)FSFolder_new((String*)index);
    }
    else {
        THROW(ERR, "Invalid type for 'index': %o", Obj_get_class_name(index));
    }

    reader = IxReader_open((Obj*)other_folder, NULL, NULL);
    if (reader == NULL) {
        THROW(ERR, "Index doesn't seem to contain any data");
    }
    else {
        Schema *schema       = ivars->schema;
        Schema *other_schema = IxReader_Get_Schema(reader);
        Vector *other_fields = Schema_All_Fields(other_schema);
        Vector *seg_readers  = IxReader_Seg_Readers(reader);

        // Validate schema compatibility and add fields.
        Schema_Eat(schema, other_schema);

        // Add fields to Segment.
        for (size_t i = 0, max = Vec_Get_Size(other_fields); i < max; i++) {
            String *other_field = (String*)Vec_Fetch(other_fields, i);
            Seg_Add_Field(ivars->segment, other_field);
        }
        DECREF(other_fields);

        // Add all segments.
        for (size_t i = 0, max = Vec_Get_Size(seg_readers); i < max; i++) {
            SegReader *seg_reader = (SegReader*)Vec_Fetch(seg_readers, i);
            DeletionsReader *del_reader
                = (DeletionsReader*)SegReader_Fetch(
                      seg_reader, Class_Get_Name(DELETIONSREADER));
            Matcher *deletions = del_reader
                                 ? DelReader_Iterator(del_reader)
                                 : NULL;
            I32Array *doc_map = DelWriter_Generate_Doc_Map(
                                    ivars->del_writer, deletions,
                                    SegReader_Doc_Max(seg_reader),
                                    (int32_t)Seg_Get_Count(ivars->segment));
            SegWriter_Add_Segment(ivars->seg_writer, seg_reader, doc_map);
            DECREF(deletions);
            DECREF(doc_map);
        }
        DECREF(seg_readers);
    }

    DECREF(reader);
    DECREF(other_folder);
}
Пример #9
0
Matcher*
PhraseCompiler_make_matcher(PhraseCompiler *self, SegReader *reader,
                            bool_t need_score)
{
    PostingsReader *const post_reader = (PostingsReader*)SegReader_Fetch(
        reader, POSTINGSREADER.name);
    PhraseQuery *const parent = (PhraseQuery*)self->parent;
    VArray  *const terms      = parent->terms;
    u32_t    num_terms        = VA_Get_Size(terms);
    Schema  *schema           = SegReader_Get_Schema(reader);
    Posting *posting          = Schema_Fetch_Posting(schema, parent->field);
    VArray  *plists;
    Matcher *retval;
    u32_t i;
    UNUSED_VAR(need_score);

    /* Bail if there are no terms. */
    if (!num_terms) return NULL;

    /* Bail unless field is valid and posting type supports positions. */
    if (posting == NULL || !OBJ_IS_A(posting, SCOREPOSTING)) return NULL;

    /* Bail if there's no PostingsReader for this segment. */
    if (!post_reader) { return NULL; }

    /* Look up each term. */
    plists = VA_new(num_terms);
    for (i = 0; i < num_terms; i++) {
        Obj *term = VA_Fetch(terms, i);
        PostingList *plist 
            = PostReader_Posting_List(post_reader, parent->field, term);

        /* Bail if any one of the terms isn't in the index. */
        if (!plist || !PList_Get_Doc_Freq(plist)) {
            DECREF(plist);
            DECREF(plists);
            return NULL;
        }
        VA_Push(plists, (Obj*)plist);
    }

    retval = (Matcher*)PhraseScorer_new(
        Compiler_Get_Similarity(self),
        plists, 
        (Compiler*)self
    );
    DECREF(plists);

    return retval;
}
Пример #10
0
Matcher*
PhraseCompiler_Make_Matcher_IMP(PhraseCompiler *self, SegReader *reader,
                                bool need_score) {
    UNUSED_VAR(need_score);
    PhraseCompilerIVARS *const ivars = PhraseCompiler_IVARS(self);
    PhraseQueryIVARS *const parent_ivars
        = PhraseQuery_IVARS((PhraseQuery*)ivars->parent);
    Vector *const      terms     = parent_ivars->terms;
    uint32_t           num_terms = Vec_Get_Size(terms);

    // Bail if there are no terms.
    if (!num_terms) { return NULL; }

    // Bail unless field is valid and posting type supports positions.
    Similarity *sim     = PhraseCompiler_Get_Similarity(self);
    Posting    *posting = Sim_Make_Posting(sim);
    if (posting == NULL || !Obj_is_a((Obj*)posting, SCOREPOSTING)) {
        DECREF(posting);
        return NULL;
    }
    DECREF(posting);

    // Bail if there's no PostingListReader for this segment.
    PostingListReader *const plist_reader
        = (PostingListReader*)SegReader_Fetch(
              reader, Class_Get_Name(POSTINGLISTREADER));
    if (!plist_reader) { return NULL; }

    // Look up each term.
    Vector  *plists = Vec_new(num_terms);
    for (uint32_t i = 0; i < num_terms; i++) {
        Obj *term = Vec_Fetch(terms, i);
        PostingList *plist
            = PListReader_Posting_List(plist_reader, parent_ivars->field, term);

        // Bail if any one of the terms isn't in the index.
        if (!plist || !PList_Get_Doc_Freq(plist)) {
            DECREF(plist);
            DECREF(plists);
            return NULL;
        }
        Vec_Push(plists, (Obj*)plist);
    }

    Matcher *retval
        = (Matcher*)PhraseMatcher_new(sim, plists, (Compiler*)self);
    DECREF(plists);
    return retval;
}
Пример #11
0
DefaultDeletionsWriter*
DefDelWriter_init(DefaultDeletionsWriter *self, Schema *schema,
                  Snapshot *snapshot, Segment *segment,
                  PolyReader *polyreader) {

    DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader);
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    ivars->seg_readers          = PolyReader_Seg_Readers(polyreader);
    uint32_t num_seg_readers    = VA_Get_Size(ivars->seg_readers);
    ivars->seg_starts           = PolyReader_Offsets(polyreader);
    ivars->bit_vecs             = VA_new(num_seg_readers);
    ivars->updated              = (bool*)CALLOCATE(num_seg_readers, sizeof(bool));
    ivars->searcher             = IxSearcher_new((Obj*)polyreader);
    ivars->name_to_tick         = Hash_new(num_seg_readers);

    // Materialize a BitVector of deletions for each segment.
    for (uint32_t i = 0; i < num_seg_readers; i++) {
        SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i);
        BitVector *bit_vec    = BitVec_new(SegReader_Doc_Max(seg_reader));
        DeletionsReader *del_reader
            = (DeletionsReader*)SegReader_Fetch(
                  seg_reader, Class_Get_Name(DELETIONSREADER));
        Matcher *seg_dels = del_reader
                            ? DelReader_Iterator(del_reader)
                            : NULL;

        if (seg_dels) {
            int32_t del;
            while (0 != (del = Matcher_Next(seg_dels))) {
                BitVec_Set(bit_vec, del);
            }
            DECREF(seg_dels);
        }
        VA_Store(ivars->bit_vecs, i, (Obj*)bit_vec);
        Hash_Store(ivars->name_to_tick,
                   (Obj*)SegReader_Get_Seg_Name(seg_reader),
                   (Obj*)Int32_new(i));
    }

    return self;
}