コード例 #1
0
ファイル: Indexer.c プロジェクト: gitpan/KinoSearch
void
Indexer_add_index(Indexer *self, Obj *index)
{
    Folder *other_folder = NULL;
    IndexReader *reader  = NULL;
    
    if (Obj_Is_A(index, FOLDER)) {
        other_folder = (Folder*)INCREF(index);
    }
    else if (Obj_Is_A(index, CHARBUF)) {
        other_folder = (Folder*)FSFolder_new((CharBuf*)index);
    }
    else {
        THROW(ERR, "Invalid type for 'index': %o", Obj_Get_Class_Name(index));
    }

    reader = IxReader_open((Obj*)other_folder, NULL, NULL);
    if (reader == NULL) {
        THROW(ERR, "Index doesn't seem to contain any data");
    }
    else {
        Schema *schema       = self->schema;
        Schema *other_schema = IxReader_Get_Schema(reader);
        VArray *other_fields = Schema_All_Fields(other_schema);
        VArray *seg_readers  = IxReader_Seg_Readers(reader);
        uint32_t i, max;

        // Validate schema compatibility and add fields. 
        Schema_Eat(schema, other_schema);

        // Add fields to Segment. 
        for (i = 0, max = VA_Get_Size(other_fields); i < max; i++) {
            CharBuf *other_field = (CharBuf*)VA_Fetch(other_fields, i);
            Seg_Add_Field(self->segment, other_field);
        }
        DECREF(other_fields);

        // Add all segments. 
        for (i = 0, max = VA_Get_Size(seg_readers); i < max; i++) {
            SegReader *seg_reader = (SegReader*)VA_Fetch(seg_readers, i);
            DeletionsReader *del_reader = (DeletionsReader*)SegReader_Fetch(
                seg_reader, VTable_Get_Name(DELETIONSREADER));
            Matcher *deletions = del_reader 
                               ? DelReader_Iterator(del_reader) 
                               : NULL;
            I32Array *doc_map = DelWriter_Generate_Doc_Map(self->del_writer,
                deletions, SegReader_Doc_Max(seg_reader),
                (int32_t)Seg_Get_Count(self->segment) 
            );
            SegWriter_Add_Segment(self->seg_writer, seg_reader, doc_map);
            DECREF(deletions);
            DECREF(doc_map);
        }
        DECREF(seg_readers);
    }

    DECREF(reader);
    DECREF(other_folder);
}
コード例 #2
0
ファイル: Indexer.c プロジェクト: rectang/lucy
void
Indexer_Add_Index_IMP(Indexer *self, Obj *index) {
    IndexerIVARS *const ivars = Indexer_IVARS(self);
    Folder *other_folder = NULL;
    IndexReader *reader  = NULL;

    if (Obj_is_a(index, FOLDER)) {
        other_folder = (Folder*)INCREF(index);
    }
    else if (Obj_is_a(index, STRING)) {
        other_folder = (Folder*)FSFolder_new((String*)index);
    }
    else {
        THROW(ERR, "Invalid type for 'index': %o", Obj_get_class_name(index));
    }

    reader = IxReader_open((Obj*)other_folder, NULL, NULL);
    if (reader == NULL) {
        THROW(ERR, "Index doesn't seem to contain any data");
    }
    else {
        Schema *schema       = ivars->schema;
        Schema *other_schema = IxReader_Get_Schema(reader);
        Vector *other_fields = Schema_All_Fields(other_schema);
        Vector *seg_readers  = IxReader_Seg_Readers(reader);

        // Validate schema compatibility and add fields.
        Schema_Eat(schema, other_schema);

        // Add fields to Segment.
        for (size_t i = 0, max = Vec_Get_Size(other_fields); i < max; i++) {
            String *other_field = (String*)Vec_Fetch(other_fields, i);
            Seg_Add_Field(ivars->segment, other_field);
        }
        DECREF(other_fields);

        // Add all segments.
        for (size_t i = 0, max = Vec_Get_Size(seg_readers); i < max; i++) {
            SegReader *seg_reader = (SegReader*)Vec_Fetch(seg_readers, i);
            DeletionsReader *del_reader
                = (DeletionsReader*)SegReader_Fetch(
                      seg_reader, Class_Get_Name(DELETIONSREADER));
            Matcher *deletions = del_reader
                                 ? DelReader_Iterator(del_reader)
                                 : NULL;
            I32Array *doc_map = DelWriter_Generate_Doc_Map(
                                    ivars->del_writer, deletions,
                                    SegReader_Doc_Max(seg_reader),
                                    (int32_t)Seg_Get_Count(ivars->segment));
            SegWriter_Add_Segment(ivars->seg_writer, seg_reader, doc_map);
            DECREF(deletions);
            DECREF(doc_map);
        }
        DECREF(seg_readers);
    }

    DECREF(reader);
    DECREF(other_folder);
}
コード例 #3
0
ファイル: BackgroundMerger.c プロジェクト: kidaa/lucy
static uint32_t
S_maybe_merge(BackgroundMerger *self) {
    BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self);
    Vector *to_merge = IxManager_Recycle(ivars->manager, ivars->polyreader,
                                         ivars->del_writer, 0, ivars->optimize);
    int32_t num_to_merge = Vec_Get_Size(to_merge);

    // There's no point in merging one segment if it has no deletions, because
    // we'd just be rewriting it. */
    if (num_to_merge == 1) {
        SegReader *seg_reader = (SegReader*)Vec_Fetch(to_merge, 0);
        if (!SegReader_Del_Count(seg_reader)) {
            DECREF(to_merge);
            return 0;
        }
    }
    else if (num_to_merge == 0) {
        DECREF(to_merge);
        return 0;
    }

    // Now that we're sure we're writing a new segment, prep the seg dir.
    SegWriter_Prep_Seg_Dir(ivars->seg_writer);

    // Consolidate segments.
    for (uint32_t i = 0, max = num_to_merge; i < max; i++) {
        SegReader *seg_reader = (SegReader*)Vec_Fetch(to_merge, i);
        String    *seg_name   = SegReader_Get_Seg_Name(seg_reader);
        int64_t    doc_count  = Seg_Get_Count(ivars->segment);
        Matcher *deletions
            = DelWriter_Seg_Deletions(ivars->del_writer, seg_reader);
        I32Array *doc_map = DelWriter_Generate_Doc_Map(
                                ivars->del_writer, deletions,
                                SegReader_Doc_Max(seg_reader),
                                (int32_t)doc_count);

        Hash_Store(ivars->doc_maps, seg_name, (Obj*)doc_map);
        SegWriter_Merge_Segment(ivars->seg_writer, seg_reader, doc_map);
        DECREF(deletions);
    }

    DECREF(to_merge);
    return num_to_merge;
}
コード例 #4
0
ファイル: Indexer.c プロジェクト: gitpan/KinoSearch
static bool_t
S_maybe_merge(Indexer *self, VArray *seg_readers)
{
    bool_t    merge_happened  = false;
    uint32_t  num_seg_readers = VA_Get_Size(seg_readers);
    Lock     *merge_lock      = IxManager_Make_Merge_Lock(self->manager);
    bool_t    got_merge_lock  = Lock_Obtain(merge_lock);
    int64_t   cutoff;
    VArray   *to_merge;
    uint32_t  i, max;

    if (got_merge_lock) {
        self->merge_lock = merge_lock;
        cutoff = 0;
    }
    else {
        // If something else holds the merge lock, don't interfere. 
        Hash *merge_data = IxManager_Read_Merge_Data(self->manager);
        if (merge_data) {
            Obj *cutoff_obj = Hash_Fetch_Str(merge_data, "cutoff", 6);
            if (cutoff_obj) {
                cutoff = Obj_To_I64(cutoff_obj);
            }
            else {
                cutoff = I64_MAX;
            }
            DECREF(merge_data);
        }
        else {
            cutoff = I64_MAX;
        }
        DECREF(merge_lock);
    }

    // Get a list of segments to recycle.  Validate and confirm that there are
    // no dupes in the list.
    to_merge = IxManager_Recycle(self->manager, self->polyreader, 
        self->del_writer, cutoff, self->optimize);
    {
        Hash *seen = Hash_new(VA_Get_Size(to_merge));
        for (i = 0, max = VA_Get_Size(to_merge); i < max; i++) {
            SegReader *seg_reader = (SegReader*)CERTIFY(
                VA_Fetch(to_merge, i), SEGREADER);
            CharBuf *seg_name = SegReader_Get_Seg_Name(seg_reader);
            if (Hash_Fetch(seen, (Obj*)seg_name)) {
                DECREF(seen);
                DECREF(to_merge);
                THROW(ERR, "Recycle() tried to merge segment '%o' twice",
                    seg_name);
            }
            Hash_Store(seen, (Obj*)seg_name, INCREF(&EMPTY));
        }
        DECREF(seen);
    }

    // Consolidate segments if either sparse or optimizing forced. 
    for (i = 0, max = VA_Get_Size(to_merge); i < max; i++) {
        SegReader *seg_reader = (SegReader*)VA_Fetch(to_merge, i);
        int64_t seg_num = SegReader_Get_Seg_Num(seg_reader);
        Matcher *deletions 
            = DelWriter_Seg_Deletions(self->del_writer, seg_reader);
        I32Array *doc_map = DelWriter_Generate_Doc_Map(self->del_writer,
            deletions, SegReader_Doc_Max(seg_reader),
            (int32_t)Seg_Get_Count(self->segment) 
        );
        if (seg_num <= cutoff) {
            THROW(ERR, "Segment %o violates cutoff (%i64 <= %i64)",
                SegReader_Get_Seg_Name(seg_reader), seg_num, cutoff);
        }
        SegWriter_Merge_Segment(self->seg_writer, seg_reader, doc_map);
        merge_happened = true;
        DECREF(deletions);
        DECREF(doc_map);
    }

    // Write out new deletions. 
    if (DelWriter_Updated(self->del_writer)) {
        // Only write out if they haven't all been applied. 
        if (VA_Get_Size(to_merge) != num_seg_readers) {
            DelWriter_Finish(self->del_writer);
        }
    }

    DECREF(to_merge);
    return merge_happened;
}