コード例 #1
0
ファイル: PostingPool.c プロジェクト: github188/SimpleCode
void
PostPool_Add_Segment_IMP(PostingPool *self, SegReader *reader,
                         I32Array *doc_map, int32_t doc_base) {
    PostingPoolIVARS *const ivars = PostPool_IVARS(self);
    LexiconReader *lex_reader = (LexiconReader*)SegReader_Fetch(
                                    reader, Class_Get_Name(LEXICONREADER));
    Lexicon *lexicon = lex_reader
                       ? LexReader_Lexicon(lex_reader, ivars->field, NULL)
                       : NULL;

    if (lexicon) {
        PostingListReader *plist_reader
            = (PostingListReader*)SegReader_Fetch(
                  reader, Class_Get_Name(POSTINGLISTREADER));
        PostingList *plist = plist_reader
                             ? PListReader_Posting_List(plist_reader, ivars->field, NULL)
                             : NULL;
        if (!plist) {
            THROW(ERR, "Got a Lexicon but no PostingList for '%o' in '%o'",
                  ivars->field, SegReader_Get_Seg_Name(reader));
        }
        PostingPool *run
            = PostPool_new(ivars->schema, ivars->snapshot, ivars->segment,
                           ivars->polyreader, ivars->field, ivars->lex_writer,
                           ivars->mem_pool, ivars->lex_temp_out,
                           ivars->post_temp_out, ivars->skip_out);
        PostingPoolIVARS *const run_ivars = PostPool_IVARS(run);
        run_ivars->lexicon  = lexicon;
        run_ivars->plist    = plist;
        run_ivars->doc_base = doc_base;
        run_ivars->doc_map  = (I32Array*)INCREF(doc_map);
        PostPool_Add_Run(self, (SortExternal*)run);
    }
}
コード例 #2
0
ファイル: Err.c プロジェクト: leckie711/lucy-clownfish
Obj*
Err_certify(Obj *obj, Class *klass, const char *file, int line,
            const char *func) {
    if (!obj) {
        Err_throw_at(ERR, file, line, func, "Object isn't a %o, it's NULL",
                     Class_Get_Name(klass));
    }
    else if (!SI_obj_is_a(obj, klass)) {
        Err_throw_at(ERR, file, line, func, "Can't downcast from %o to %o",
                     Obj_get_class_name(obj), Class_Get_Name(klass));
    }
    return obj;
}
コード例 #3
0
ファイル: RangeQuery.c プロジェクト: apache/lucy
Matcher*
RangeCompiler_Make_Matcher_IMP(RangeCompiler *self, SegReader *reader,
                               bool need_score) {
    RangeQuery *parent = (RangeQuery*)RangeCompiler_IVARS(self)->parent;
    String *field = RangeQuery_IVARS(parent)->field;
    SortReader *sort_reader
        = (SortReader*)SegReader_Fetch(reader, Class_Get_Name(SORTREADER));
    SortCache *sort_cache = sort_reader
                            ? SortReader_Fetch_Sort_Cache(sort_reader, field)
                            : NULL;
    UNUSED_VAR(need_score);

    if (!sort_cache) {
        return NULL;
    }
    else {
        int32_t lower = S_find_lower_bound(self, sort_cache);
        int32_t upper = S_find_upper_bound(self, sort_cache);
        int32_t max_ord = SortCache_Get_Cardinality(sort_cache) + 1;
        if (lower > max_ord || upper < 0) {
            return NULL;
        }
        else {
            int32_t doc_max = SegReader_Doc_Max(reader);
            return (Matcher*)RangeMatcher_new(lower, upper, sort_cache,
                                              doc_max);
        }
    }
}
コード例 #4
0
ファイル: SortWriter.c プロジェクト: apache/lucy
void
SortWriter_Add_Segment_IMP(SortWriter *self, SegReader *reader,
                           I32Array *doc_map) {
    SortWriterIVARS *const ivars = SortWriter_IVARS(self);
    Vector *fields = Schema_All_Fields(ivars->schema);

    // Proceed field-at-a-time, rather than doc-at-a-time.
    for (size_t i = 0, max = Vec_Get_Size(fields); i < max; i++) {
        String *field = (String*)Vec_Fetch(fields, i);
        SortReader *sort_reader = (SortReader*)SegReader_Fetch(
                                      reader, Class_Get_Name(SORTREADER));
        SortCache *cache = sort_reader
                           ? SortReader_Fetch_Sort_Cache(sort_reader, field)
                           : NULL;
        if (cache) {
            int32_t field_num = Seg_Field_Num(ivars->segment, field);
            SortFieldWriter *field_writer
                = S_lazy_init_field_writer(self, field_num);
            SortFieldWriter_Add_Segment(field_writer, reader, doc_map, cache);
            ivars->flush_at_finish = true;
        }
    }

    DECREF(fields);
}
コード例 #5
0
ファイル: SortCollector.c プロジェクト: carriercomm/lucy
void
SortColl_Set_Reader_IMP(SortCollector *self, SegReader *reader) {
    SortCollectorIVARS *const ivars = SortColl_IVARS(self);
    SortReader *sort_reader
        = (SortReader*)SegReader_Fetch(reader, Class_Get_Name(SORTREADER));

    // Reset threshold variables and trigger auto-action behavior.
    MatchDocIVARS *const bumped_ivars = MatchDoc_IVARS(ivars->bumped);
    bumped_ivars->doc_id = INT32_MAX;
    ivars->bubble_doc    = INT32_MAX;
    bumped_ivars->score  = ivars->need_score ? CHY_F32_NEGINF : CHY_F32_NAN;
    ivars->bubble_score  = ivars->need_score ? CHY_F32_NEGINF : CHY_F32_NAN;
    ivars->actions       = ivars->auto_actions;

    // Obtain sort caches. Derive actions array for this segment.
    if (ivars->need_values && sort_reader) {
        for (uint32_t i = 0, max = ivars->num_rules; i < max; i++) {
            SortRule  *rule  = (SortRule*)Vec_Fetch(ivars->rules, i);
            String    *field = SortRule_Get_Field(rule);
            SortCache *cache = field
                               ? SortReader_Fetch_Sort_Cache(sort_reader, field)
                               : NULL;
            ivars->sort_caches[i] = cache;
            ivars->derived_actions[i] = S_derive_action(rule, cache);
            if (cache) { ivars->ord_arrays[i] = SortCache_Get_Ords(cache); }
            else       { ivars->ord_arrays[i] = NULL; }
        }
    }
    ivars->seg_doc_max = reader ? SegReader_Doc_Max(reader) : 0;
    SortColl_Set_Reader_t super_set_reader
        = (SortColl_Set_Reader_t)SUPER_METHOD_PTR(SORTCOLLECTOR,
                                                  LUCY_SortColl_Set_Reader);
    super_set_reader(self, reader);
}
コード例 #6
0
Matcher*
DefDelWriter_Seg_Deletions_IMP(DefaultDeletionsWriter *self,
                               SegReader *seg_reader) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    Matcher *deletions    = NULL;
    Segment *segment      = SegReader_Get_Segment(seg_reader);
    String  *seg_name     = Seg_Get_Name(segment);
    Integer32 *tick_obj   = (Integer32*)Hash_Fetch(ivars->name_to_tick,
                                                   (Obj*)seg_name);
    int32_t tick          = tick_obj ? Int32_Get_Value(tick_obj) : 0;
    SegReader *candidate  = tick_obj
                            ? (SegReader*)VA_Fetch(ivars->seg_readers, tick)
                            : NULL;

    if (tick_obj) {
        DeletionsReader *del_reader
            = (DeletionsReader*)SegReader_Obtain(
                  candidate, Class_Get_Name(DELETIONSREADER));
        if (ivars->updated[tick] || DelReader_Del_Count(del_reader)) {
            BitVector *deldocs = (BitVector*)VA_Fetch(ivars->bit_vecs, tick);
            deletions = (Matcher*)BitVecMatcher_new(deldocs);
        }
    }
    else { // Sanity check.
        THROW(ERR, "Couldn't find SegReader %o", seg_reader);
    }

    return deletions;
}
コード例 #7
0
void
DefDelWriter_Delete_By_Term_IMP(DefaultDeletionsWriter *self,
                                String *field, Obj *term) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) {
        SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i);
        PostingListReader *plist_reader
            = (PostingListReader*)SegReader_Fetch(
                  seg_reader, Class_Get_Name(POSTINGLISTREADER));
        BitVector *bit_vec = (BitVector*)VA_Fetch(ivars->bit_vecs, i);
        PostingList *plist = plist_reader
                             ? PListReader_Posting_List(plist_reader, field, term)
                             : NULL;
        int32_t doc_id;
        int32_t num_zapped = 0;

        // Iterate through postings, marking each doc as deleted.
        if (plist) {
            while (0 != (doc_id = PList_Next(plist))) {
                num_zapped += !BitVec_Get(bit_vec, doc_id);
                BitVec_Set(bit_vec, doc_id);
            }
            if (num_zapped) { ivars->updated[i] = true; }
            DECREF(plist);
        }
    }
}
コード例 #8
0
ファイル: Freezer.c プロジェクト: rectang/lucy
static Obj*
S_load_via_load_method(Class *klass, Obj *dump) {
    Obj *dummy = Class_Make_Obj(klass);
    Obj *loaded = NULL;
    if (Obj_is_a(dummy, ANALYZER)) {
        loaded = Analyzer_Load((Analyzer*)dummy, dump);
    }
    else if (Obj_is_a(dummy, DOC)) {
        loaded = (Obj*)Doc_Load((Doc*)dummy, dump);
    }
    else if (Obj_is_a(dummy, SIMILARITY)) {
        loaded = (Obj*)Sim_Load((Similarity*)dummy, dump);
    }
    else if (Obj_is_a(dummy, FIELDTYPE)) {
        loaded = FType_Load((FieldType*)dummy, dump);
    }
    else if (Obj_is_a(dummy, SCHEMA)) {
        loaded = (Obj*)Schema_Load((Schema*)dummy, dump);
    }
    else if (Obj_is_a(dummy, QUERY)) {
        loaded = Query_Load((Query*)dummy, dump);
    }
    else {
        DECREF(dummy);
        THROW(ERR, "Don't know how to load '%o'", Class_Get_Name(klass));
    }

    DECREF(dummy);
    return loaded;
}
コード例 #9
0
ファイル: TermQuery.c プロジェクト: apache/lucy
Matcher*
TermCompiler_Make_Matcher_IMP(TermCompiler *self, SegReader *reader,
                              bool need_score) {
    TermCompilerIVARS *const ivars = TermCompiler_IVARS(self);
    TermQueryIVARS *const parent_ivars
        = TermQuery_IVARS((TermQuery*)ivars->parent);
    PostingListReader *plist_reader
        = (PostingListReader*)SegReader_Fetch(
              reader, Class_Get_Name(POSTINGLISTREADER));
    PostingList *plist = plist_reader
                         ? PListReader_Posting_List(plist_reader,
                                                    parent_ivars->field,
                                                    parent_ivars->term)
                         : NULL;

    if (plist == NULL || PList_Get_Doc_Freq(plist) == 0) {
        DECREF(plist);
        return NULL;
    }
    else {
        Matcher *retval = PList_Make_Matcher(plist, ivars->sim,
                                             (Compiler*)self, need_score);
        DECREF(plist);
        return retval;
    }
}
コード例 #10
0
ファイル: SegReader.c プロジェクト: apache/lucy
SegReader*
SegReader_init(SegReader *self, Schema *schema, Folder *folder,
               Snapshot *snapshot, Vector *segments, int32_t seg_tick) {
    Segment *segment;

    IxReader_init((IndexReader*)self, schema, folder, snapshot, segments,
                  seg_tick, NULL);
    SegReaderIVARS *const ivars = SegReader_IVARS(self);
    segment = SegReader_Get_Segment(self);

    ivars->doc_max    = (int32_t)Seg_Get_Count(segment);
    ivars->seg_name   = (String*)INCREF(Seg_Get_Name(segment));
    ivars->seg_num    = Seg_Get_Number(segment);
    Err *error = Err_trap(S_try_init_components, self);
    if (error) {
        // An error occurred, so clean up self and rethrow the exception.
        DECREF(self);
        RETHROW(error);
    }

    DeletionsReader *del_reader
        = (DeletionsReader*)Hash_Fetch(
              ivars->components, Class_Get_Name(DELETIONSREADER));
    ivars->del_count = del_reader ? DelReader_Del_Count(del_reader) : 0;

    return self;
}
コード例 #11
0
ファイル: Indexer.c プロジェクト: rectang/lucy
void
Indexer_Add_Index_IMP(Indexer *self, Obj *index) {
    IndexerIVARS *const ivars = Indexer_IVARS(self);
    Folder *other_folder = NULL;
    IndexReader *reader  = NULL;

    if (Obj_is_a(index, FOLDER)) {
        other_folder = (Folder*)INCREF(index);
    }
    else if (Obj_is_a(index, STRING)) {
        other_folder = (Folder*)FSFolder_new((String*)index);
    }
    else {
        THROW(ERR, "Invalid type for 'index': %o", Obj_get_class_name(index));
    }

    reader = IxReader_open((Obj*)other_folder, NULL, NULL);
    if (reader == NULL) {
        THROW(ERR, "Index doesn't seem to contain any data");
    }
    else {
        Schema *schema       = ivars->schema;
        Schema *other_schema = IxReader_Get_Schema(reader);
        Vector *other_fields = Schema_All_Fields(other_schema);
        Vector *seg_readers  = IxReader_Seg_Readers(reader);

        // Validate schema compatibility and add fields.
        Schema_Eat(schema, other_schema);

        // Add fields to Segment.
        for (size_t i = 0, max = Vec_Get_Size(other_fields); i < max; i++) {
            String *other_field = (String*)Vec_Fetch(other_fields, i);
            Seg_Add_Field(ivars->segment, other_field);
        }
        DECREF(other_fields);

        // Add all segments.
        for (size_t i = 0, max = Vec_Get_Size(seg_readers); i < max; i++) {
            SegReader *seg_reader = (SegReader*)Vec_Fetch(seg_readers, i);
            DeletionsReader *del_reader
                = (DeletionsReader*)SegReader_Fetch(
                      seg_reader, Class_Get_Name(DELETIONSREADER));
            Matcher *deletions = del_reader
                                 ? DelReader_Iterator(del_reader)
                                 : NULL;
            I32Array *doc_map = DelWriter_Generate_Doc_Map(
                                    ivars->del_writer, deletions,
                                    SegReader_Doc_Max(seg_reader),
                                    (int32_t)Seg_Get_Count(ivars->segment));
            SegWriter_Add_Segment(ivars->seg_writer, seg_reader, doc_map);
            DECREF(deletions);
            DECREF(doc_map);
        }
        DECREF(seg_readers);
    }

    DECREF(reader);
    DECREF(other_folder);
}
コード例 #12
0
ファイル: Err.c プロジェクト: leckie711/lucy-clownfish
Obj*
Err_downcast(Obj *obj, Class *klass, const char *file, int line,
             const char *func) {
    if (obj && !SI_obj_is_a(obj, klass)) {
        Err_throw_at(ERR, file, line, func, "Can't downcast from %o to %o",
                     Obj_get_class_name(obj), Class_Get_Name(klass));
    }
    return obj;
}
コード例 #13
0
ファイル: TestObj.c プロジェクト: timwilkens/lucy-clownfish
static void
test_Is_A(TestBatchRunner *runner) {
    String *string     = Str_new_from_trusted_utf8("", 0);
    Class  *str_class  = Str_Get_Class(string);
    String *class_name = Str_Get_Class_Name(string);

    TEST_TRUE(runner, Str_Is_A(string, STRING), "String Is_A String.");
    TEST_TRUE(runner, Str_Is_A(string, OBJ), "String Is_A Obj.");
    TEST_TRUE(runner, str_class == STRING, "Get_Class");
    TEST_TRUE(runner, Str_Equals(Class_Get_Name(STRING), (Obj*)class_name),
              "Get_Class_Name");

    DECREF(string);
}
コード例 #14
0
Matcher*
PhraseCompiler_Make_Matcher_IMP(PhraseCompiler *self, SegReader *reader,
                                bool need_score) {
    UNUSED_VAR(need_score);
    PhraseCompilerIVARS *const ivars = PhraseCompiler_IVARS(self);
    PhraseQueryIVARS *const parent_ivars
        = PhraseQuery_IVARS((PhraseQuery*)ivars->parent);
    Vector *const      terms     = parent_ivars->terms;
    uint32_t           num_terms = Vec_Get_Size(terms);

    // Bail if there are no terms.
    if (!num_terms) { return NULL; }

    // Bail unless field is valid and posting type supports positions.
    Similarity *sim     = PhraseCompiler_Get_Similarity(self);
    Posting    *posting = Sim_Make_Posting(sim);
    if (posting == NULL || !Obj_is_a((Obj*)posting, SCOREPOSTING)) {
        DECREF(posting);
        return NULL;
    }
    DECREF(posting);

    // Bail if there's no PostingListReader for this segment.
    PostingListReader *const plist_reader
        = (PostingListReader*)SegReader_Fetch(
              reader, Class_Get_Name(POSTINGLISTREADER));
    if (!plist_reader) { return NULL; }

    // Look up each term.
    Vector  *plists = Vec_new(num_terms);
    for (uint32_t i = 0; i < num_terms; i++) {
        Obj *term = Vec_Fetch(terms, i);
        PostingList *plist
            = PListReader_Posting_List(plist_reader, parent_ivars->field, term);

        // Bail if any one of the terms isn't in the index.
        if (!plist || !PList_Get_Doc_Freq(plist)) {
            DECREF(plist);
            DECREF(plists);
            return NULL;
        }
        Vec_Push(plists, (Obj*)plist);
    }

    Matcher *retval
        = (Matcher*)PhraseMatcher_new(sim, plists, (Compiler*)self);
    DECREF(plists);
    return retval;
}
コード例 #15
0
ファイル: DeletionsWriter.c プロジェクト: apache/lucy
void
DefDelWriter_Merge_Segment_IMP(DefaultDeletionsWriter *self,
                               SegReader *reader, I32Array *doc_map) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    UNUSED_VAR(doc_map);
    Segment *segment = SegReader_Get_Segment(reader);
    Hash *del_meta = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "deletions", 9);

    if (del_meta) {
        Vector *seg_readers = ivars->seg_readers;
        Hash   *files = (Hash*)Hash_Fetch_Utf8(del_meta, "files", 5);
        if (files) {
            HashIterator *iter = HashIter_new(files);
            while (HashIter_Next(iter)) {
                String *seg       = HashIter_Get_Key(iter);
                Hash   *mini_meta = (Hash*)HashIter_Get_Value(iter);

                /* Find the segment the deletions from the SegReader
                 * we're adding correspond to.  If it's gone, we don't
                 * need to worry about losing deletions files that point
                 * at it. */
                for (size_t i = 0, max = Vec_Get_Size(seg_readers); i < max; i++) {
                    SegReader *candidate
                        = (SegReader*)Vec_Fetch(seg_readers, i);
                    String *candidate_name
                        = Seg_Get_Name(SegReader_Get_Segment(candidate));

                    if (Str_Equals(seg, (Obj*)candidate_name)) {
                        /* If the count hasn't changed, we're about to
                         * merge away the most recent deletions file
                         * pointing at this target segment -- so force a
                         * new file to be written out. */
                        int32_t count = (int32_t)Json_obj_to_i64(Hash_Fetch_Utf8(mini_meta, "count", 5));
                        DeletionsReader *del_reader
                            = (DeletionsReader*)SegReader_Obtain(
                                  candidate, Class_Get_Name(DELETIONSREADER));
                        if (count == DelReader_Del_Count(del_reader)) {
                            ivars->updated[i] = true;
                        }
                        break;
                    }
                }
            }
            DECREF(iter);
        }
    }
}
コード例 #16
0
DefaultDeletionsWriter*
DefDelWriter_init(DefaultDeletionsWriter *self, Schema *schema,
                  Snapshot *snapshot, Segment *segment,
                  PolyReader *polyreader) {

    DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader);
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    ivars->seg_readers          = PolyReader_Seg_Readers(polyreader);
    uint32_t num_seg_readers    = VA_Get_Size(ivars->seg_readers);
    ivars->seg_starts           = PolyReader_Offsets(polyreader);
    ivars->bit_vecs             = VA_new(num_seg_readers);
    ivars->updated              = (bool*)CALLOCATE(num_seg_readers, sizeof(bool));
    ivars->searcher             = IxSearcher_new((Obj*)polyreader);
    ivars->name_to_tick         = Hash_new(num_seg_readers);

    // Materialize a BitVector of deletions for each segment.
    for (uint32_t i = 0; i < num_seg_readers; i++) {
        SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i);
        BitVector *bit_vec    = BitVec_new(SegReader_Doc_Max(seg_reader));
        DeletionsReader *del_reader
            = (DeletionsReader*)SegReader_Fetch(
                  seg_reader, Class_Get_Name(DELETIONSREADER));
        Matcher *seg_dels = del_reader
                            ? DelReader_Iterator(del_reader)
                            : NULL;

        if (seg_dels) {
            int32_t del;
            while (0 != (del = Matcher_Next(seg_dels))) {
                BitVec_Set(bit_vec, del);
            }
            DECREF(seg_dels);
        }
        VA_Store(ivars->bit_vecs, i, (Obj*)bit_vec);
        Hash_Store(ivars->name_to_tick,
                   (Obj*)SegReader_Get_Seg_Name(seg_reader),
                   (Obj*)Int32_new(i));
    }

    return self;
}
コード例 #17
0
void
HLWriter_Add_Segment_IMP(HighlightWriter *self, SegReader *reader,
                         I32Array *doc_map) {
    HighlightWriterIVARS *const ivars = HLWriter_IVARS(self);
    int32_t doc_max = SegReader_Doc_Max(reader);

    if (doc_max == 0) {
        // Bail if the supplied segment is empty.
        return;
    }
    else {
        DefaultHighlightReader *hl_reader
            = (DefaultHighlightReader*)CERTIFY(
                  SegReader_Obtain(reader, Class_Get_Name(HIGHLIGHTREADER)),
                  DEFAULTHIGHLIGHTREADER);
        OutStream *dat_out = S_lazy_init(self);
        OutStream *ix_out  = ivars->ix_out;
        int32_t    orig;
        ByteBuf   *bb = BB_new(0);

        for (orig = 1; orig <= doc_max; orig++) {
            // Skip deleted docs.
            if (doc_map && !I32Arr_Get(doc_map, orig)) {
                continue;
            }

            // Write file pointer.
            OutStream_Write_I64(ix_out, OutStream_Tell(dat_out));

            // Copy the raw record.
            DefHLReader_Read_Record(hl_reader, orig, bb);
            OutStream_Write_Bytes(dat_out, BB_Get_Buf(bb), BB_Get_Size(bb));

            BB_Set_Size(bb, 0);
        }
        DECREF(bb);
    }
}
コード例 #18
0
ファイル: Class.c プロジェクト: rectang/lucy-clownfish
static Class*
S_simple_subclass(Class *parent, String *name) {
    if (parent->flags & CFISH_fFINAL) {
        THROW(ERR, "Can't subclass final class %o", Class_Get_Name(parent));
    }

    Class *subclass
        = (Class*)Memory_wrapped_calloc(parent->class_alloc_size, 1);
    Class_Init_Obj(parent->klass, subclass);

    subclass->parent           = parent;
    subclass->flags            = parent->flags;
    subclass->obj_alloc_size   = parent->obj_alloc_size;
    subclass->class_alloc_size = parent->class_alloc_size;
    subclass->methods          = (Method**)CALLOCATE(1, sizeof(Method*));

    S_set_name(subclass, Str_Get_Ptr8(name), Str_Get_Size(name));

    memcpy(subclass->vtable, parent->vtable,
           parent->class_alloc_size - offsetof(Class, vtable));

    return subclass;
}
コード例 #19
0
ファイル: DocWriter.c プロジェクト: apache/lucy
void
DocWriter_Add_Segment_IMP(DocWriter *self, SegReader *reader,
                          I32Array *doc_map) {
    DocWriterIVARS *const ivars = DocWriter_IVARS(self);
    int32_t doc_max = SegReader_Doc_Max(reader);

    if (doc_max == 0) {
        // Bail if the supplied segment is empty.
        return;
    }
    else {
        OutStream *const dat_out = S_lazy_init(self);
        OutStream *const ix_out  = ivars->ix_out;
        ByteBuf   *const buffer  = BB_new(0);
        DefaultDocReader *const doc_reader
            = (DefaultDocReader*)CERTIFY(
                  SegReader_Obtain(reader, Class_Get_Name(DOCREADER)),
                  DEFAULTDOCREADER);

        for (int32_t i = 1, max = SegReader_Doc_Max(reader); i <= max; i++) {
            if (I32Arr_Get(doc_map, (size_t)i)) {
                int64_t  start = OutStream_Tell(dat_out);

                // Copy record over.
                DefDocReader_Read_Record(doc_reader, buffer, i);
                const char *buf  = BB_Get_Buf(buffer);
                size_t      size = BB_Get_Size(buffer);
                OutStream_Write_Bytes(dat_out, buf, size);

                // Write file pointer.
                OutStream_Write_I64(ix_out, start);
            }
        }

        DECREF(buffer);
    }
}
コード例 #20
0
ファイル: Err.c プロジェクト: apache/lucy-clownfish
void
Err_abstract_method_call(Obj *obj, Class *klass, const char *method_name) {
    String *class_name = obj ? Obj_get_class_name(obj) : Class_Get_Name(klass);
    THROW(ERR, "Abstract method '%s' not defined by %o", method_name,
          class_name);
}
コード例 #21
0
ファイル: BackgroundMerger.c プロジェクト: kidaa/lucy
static bool
S_merge_updated_deletions(BackgroundMerger *self) {
    BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self);
    Hash *updated_deletions = NULL;

    PolyReader *new_polyreader
        = PolyReader_open((Obj*)ivars->folder, NULL, NULL);
    Vector *new_seg_readers
        = PolyReader_Get_Seg_Readers(new_polyreader);
    Vector *old_seg_readers
        = PolyReader_Get_Seg_Readers(ivars->polyreader);
    Hash *new_segs = Hash_new(Vec_Get_Size(new_seg_readers));

    for (uint32_t i = 0, max = Vec_Get_Size(new_seg_readers); i < max; i++) {
        SegReader *seg_reader = (SegReader*)Vec_Fetch(new_seg_readers, i);
        String    *seg_name   = SegReader_Get_Seg_Name(seg_reader);
        Hash_Store(new_segs, seg_name, INCREF(seg_reader));
    }

    for (uint32_t i = 0, max = Vec_Get_Size(old_seg_readers); i < max; i++) {
        SegReader *seg_reader = (SegReader*)Vec_Fetch(old_seg_readers, i);
        String    *seg_name   = SegReader_Get_Seg_Name(seg_reader);

        // If this segment was merged away...
        if (Hash_Fetch(ivars->doc_maps, seg_name)) {
            SegReader *new_seg_reader
                = (SegReader*)CERTIFY(
                      Hash_Fetch(new_segs, seg_name),
                      SEGREADER);
            int32_t old_del_count = SegReader_Del_Count(seg_reader);
            int32_t new_del_count = SegReader_Del_Count(new_seg_reader);
            // ... were any new deletions applied against it?
            if (old_del_count != new_del_count) {
                DeletionsReader *del_reader
                    = (DeletionsReader*)SegReader_Obtain(
                          new_seg_reader,
                          Class_Get_Name(DELETIONSREADER));
                if (!updated_deletions) {
                    updated_deletions = Hash_new(max);
                }
                Hash_Store(updated_deletions, seg_name,
                           (Obj*)DelReader_Iterator(del_reader));
            }
        }
    }

    DECREF(new_polyreader);
    DECREF(new_segs);

    if (!updated_deletions) {
        return false;
    }
    else {
        PolyReader *merge_polyreader
            = PolyReader_open((Obj*)ivars->folder, ivars->snapshot, NULL);
        Vector *merge_seg_readers
            = PolyReader_Get_Seg_Readers(merge_polyreader);
        Snapshot *latest_snapshot
            = Snapshot_Read_File(Snapshot_new(), ivars->folder, NULL);
        int64_t new_seg_num
            = IxManager_Highest_Seg_Num(ivars->manager, latest_snapshot) + 1;
        Segment   *new_segment = Seg_new(new_seg_num);
        SegWriter *seg_writer  = SegWriter_new(ivars->schema, ivars->snapshot,
                                               new_segment, merge_polyreader);
        DeletionsWriter *del_writer = SegWriter_Get_Del_Writer(seg_writer);
        int64_t  merge_seg_num = Seg_Get_Number(ivars->segment);
        uint32_t seg_tick      = INT32_MAX;
        int32_t  offset        = INT32_MAX;

        SegWriter_Prep_Seg_Dir(seg_writer);

        for (uint32_t i = 0, max = Vec_Get_Size(merge_seg_readers); i < max; i++) {
            SegReader *seg_reader
                = (SegReader*)Vec_Fetch(merge_seg_readers, i);
            if (SegReader_Get_Seg_Num(seg_reader) == merge_seg_num) {
                I32Array *offsets = PolyReader_Offsets(merge_polyreader);
                seg_tick = i;
                offset = I32Arr_Get(offsets, seg_tick);
                DECREF(offsets);
            }
        }
        if (offset == INT32_MAX) { THROW(ERR, "Failed sanity check"); }

        HashIterator *iter = HashIter_new(updated_deletions);
        while (HashIter_Next(iter)) {
            String  *seg_name  = HashIter_Get_Key(iter);
            Matcher *deletions = (Matcher*)HashIter_Get_Value(iter);

            I32Array *doc_map
                = (I32Array*)CERTIFY(
                      Hash_Fetch(ivars->doc_maps, seg_name),
                      I32ARRAY);
            int32_t del;
            while (0 != (del = Matcher_Next(deletions))) {
                // Find the slot where the deleted doc resides in the
                // rewritten segment. If the doc was already deleted when we
                // were merging, do nothing.
                int32_t remapped = I32Arr_Get(doc_map, del);
                if (remapped) {
                    // It's a new deletion, so carry it forward and zap it in
                    // the rewritten segment.
                    DelWriter_Delete_By_Doc_ID(del_writer, remapped + offset);
                }
            }
        }
        DECREF(iter);

        // Finish the segment and clean up.
        DelWriter_Finish(del_writer);
        SegWriter_Finish(seg_writer);
        DECREF(seg_writer);
        DECREF(new_segment);
        DECREF(latest_snapshot);
        DECREF(merge_polyreader);
        DECREF(updated_deletions);
    }

    return true;
}
コード例 #22
0
ファイル: LexIndex.c プロジェクト: kidaa/lucy
void
LexIndex_Seek_IMP(LexIndex *self, Obj *target) {
    LexIndexIVARS *const ivars = LexIndex_IVARS(self);
    TermStepper *term_stepper = ivars->term_stepper;
    InStream    *ix_in        = ivars->ix_in;
    FieldType   *type         = ivars->field_type;
    int32_t      lo           = 0;
    int32_t      hi           = ivars->size - 1;
    int32_t      result       = -100;

    if (target == NULL || ivars->size == 0) {
        ivars->tick = 0;
        return;
    }
    else {
        if (!Obj_is_a(target, STRING)) {
            THROW(ERR, "Target is a %o, and not comparable to a %o",
                  Obj_get_class_name(target), Class_Get_Name(STRING));
        }
        /* TODO:
        Obj *first_obj = Vec_Fetch(terms, 0);
        if (!Obj_is_a(target, Obj_get_class(first_obj))) {
            THROW(ERR, "Target is a %o, and not comparable to a %o",
                Obj_get_class_name(target), Obj_get_class_name(first_obj));
        }
        */
    }

    // Divide and conquer.
    while (hi >= lo) {
        const int32_t mid = lo + ((hi - lo) / 2);
        const int64_t offset
            = (int64_t)NumUtil_decode_bigend_u64(ivars->offsets + mid);
        InStream_Seek(ix_in, offset);
        TermStepper_Read_Key_Frame(term_stepper, ix_in);

        // Compare values.  There is no need for a NULL-check because the term
        // number is alway between 0 and ivars->size - 1.
        Obj *value = TermStepper_Get_Value(term_stepper);
        int32_t comparison = FType_Compare_Values(type, target, value);

        if (comparison < 0) {
            hi = mid - 1;
        }
        else if (comparison > 0) {
            lo = mid + 1;
        }
        else {
            result = mid;
            break;
        }
    }

    // Record the index of the entry we've seeked to, then read entry.
    ivars->tick = hi == -1 // indicating that target lt first entry
                 ? 0
                 : result == -100 // if result is still -100, it wasn't set
                 ? hi
                 : result;
    S_read_entry(self);
}