コード例 #1
0
ファイル: BackgroundMerger.c プロジェクト: kidaa/lucy
BackgroundMerger*
BGMerger_init(BackgroundMerger *self, Obj *index, IndexManager *manager) {
    BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self);
    Folder *folder = S_init_folder(index);

    // Init.
    ivars->optimize      = false;
    ivars->prepared      = false;
    ivars->needs_commit  = false;
    ivars->snapfile      = NULL;
    ivars->doc_maps      = Hash_new(0);

    // Assign.
    ivars->folder = folder;
    if (manager) {
        ivars->manager = (IndexManager*)INCREF(manager);
    }
    else {
        ivars->manager = IxManager_new(NULL, NULL);
        IxManager_Set_Write_Lock_Timeout(ivars->manager, 10000);
    }
    IxManager_Set_Folder(ivars->manager, folder);

    // Obtain write lock (which we'll only hold briefly), then merge lock.
    S_obtain_write_lock(self);
    if (!ivars->write_lock) {
        DECREF(self);
        RETHROW(INCREF(Err_get_error()));
    }
    S_obtain_merge_lock(self);
    if (!ivars->merge_lock) {
        DECREF(self);
        RETHROW(INCREF(Err_get_error()));
    }

    // Find the latest snapshot.  If there's no index content, bail early.
    ivars->snapshot = Snapshot_Read_File(Snapshot_new(), folder, NULL);
    if (!Snapshot_Get_Path(ivars->snapshot)) {
        S_release_write_lock(self);
        S_release_merge_lock(self);
        return self;
    }

    // Create FilePurger. Zap detritus from previous sessions.
    ivars->file_purger = FilePurger_new(folder, ivars->snapshot, ivars->manager);
    FilePurger_Purge(ivars->file_purger);

    // Open a PolyReader, passing in the IndexManager so we get a read lock on
    // the Snapshot's files -- so that Indexers don't zap our files while
    // we're operating in the background.
    ivars->polyreader = PolyReader_open((Obj*)folder, NULL, ivars->manager);

    // Clone the PolyReader's schema.
    Obj *dump = (Obj*)Schema_Dump(PolyReader_Get_Schema(ivars->polyreader));
    ivars->schema = (Schema*)CERTIFY(Freezer_load(dump), SCHEMA);
    DECREF(dump);

    // Create new Segment.
    int64_t new_seg_num
        = IxManager_Highest_Seg_Num(ivars->manager, ivars->snapshot) + 1;
    Vector *fields = Schema_All_Fields(ivars->schema);
    ivars->segment = Seg_new(new_seg_num);
    for (uint32_t i = 0, max = Vec_Get_Size(fields); i < max; i++) {
        Seg_Add_Field(ivars->segment, (String*)Vec_Fetch(fields, i));
    }
    DECREF(fields);

    // Our "cutoff" is the segment this BackgroundMerger will write.  Now that
    // we've determined the cutoff, write the merge data file.
    ivars->cutoff = Seg_Get_Number(ivars->segment);
    IxManager_Write_Merge_Data(ivars->manager, ivars->cutoff);

    /* Create the SegWriter but hold off on preparing the new segment
     * directory -- because if we don't need to merge any segments we don't
     * need it.  (We've reserved the dir by plopping down the merge.json
     * file.) */
    ivars->seg_writer = SegWriter_new(ivars->schema, ivars->snapshot,
                                      ivars->segment, ivars->polyreader);

    // Grab a local ref to the DeletionsWriter.
    ivars->del_writer
        = (DeletionsWriter*)INCREF(SegWriter_Get_Del_Writer(ivars->seg_writer));

    // Release the write lock.  Now new Indexers can start while we work in
    // the background.
    S_release_write_lock(self);

    return self;
}
コード例 #2
0
ファイル: PolyReader.c プロジェクト: hernan604/lucy
void
S_try_open_elements(void *context) {
    struct try_open_elements_context *args
        = (struct try_open_elements_context*)context;
    PolyReader *self              = args->self;
    PolyReaderIVARS *const ivars  = PolyReader_IVARS(self);
    VArray     *files             = Snapshot_List(ivars->snapshot);
    Folder     *folder            = PolyReader_Get_Folder(self);
    uint32_t    num_segs          = 0;
    uint64_t    latest_schema_gen = 0;
    String     *schema_file       = NULL;

    // Find schema file, count segments.
    for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) {
        String *entry = (String*)VA_Fetch(files, i);

        if (Seg_valid_seg_name(entry)) {
            num_segs++;
        }
        else if (Str_Starts_With_Utf8(entry, "schema_", 7)
                 && Str_Ends_With_Utf8(entry, ".json", 5)
                ) {
            uint64_t gen = IxFileNames_extract_gen(entry);
            if (gen > latest_schema_gen) {
                latest_schema_gen = gen;
                schema_file       = entry;
            }
        }
    }

    // Read Schema.
    if (!schema_file) {
        DECREF(files);
        THROW(ERR, "Can't find a schema file.");
    }
    else {
        Obj *dump = Json_slurp_json(folder, schema_file);
        if (dump) { // read file successfully
            DECREF(ivars->schema);
            ivars->schema = (Schema*)CERTIFY(Freezer_load(dump), SCHEMA);
            DECREF(dump);
            schema_file = NULL;
        }
        else {
            String *mess = MAKE_MESS("Failed to parse %o", schema_file);
            DECREF(files);
            Err_throw_mess(ERR, mess);
        }
    }

    VArray *segments = VA_new(num_segs);
    for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) {
        String *entry = (String*)VA_Fetch(files, i);

        // Create a Segment for each segmeta.
        if (Seg_valid_seg_name(entry)) {
            int64_t seg_num = IxFileNames_extract_gen(entry);
            Segment *segment = Seg_new(seg_num);

            // Bail if reading the file fails (probably because it's been
            // deleted and a new snapshot file has been written so we need to
            // retry).
            if (Seg_Read_File(segment, folder)) {
                VA_Push(segments, (Obj*)segment);
            }
            else {
                String *mess = MAKE_MESS("Failed to read %o", entry);
                DECREF(segment);
                DECREF(segments);
                DECREF(files);
                Err_throw_mess(ERR, mess);
            }
        }
    }

    // Sort the segments by age.
    VA_Sort(segments, NULL, NULL);

    // Open individual SegReaders.
    struct try_open_segreader_context seg_context;
    seg_context.schema   = PolyReader_Get_Schema(self);
    seg_context.folder   = folder;
    seg_context.snapshot = PolyReader_Get_Snapshot(self);
    seg_context.segments = segments;
    seg_context.result   = NULL;
    args->seg_readers = VA_new(num_segs);
    Err *error = NULL;
    for (uint32_t seg_tick = 0; seg_tick < num_segs; seg_tick++) {
        seg_context.seg_tick = seg_tick;
        error = Err_trap(S_try_open_segreader, &seg_context);
        if (error) {
            break;
        }
        VA_Push(args->seg_readers, (Obj*)seg_context.result);
        seg_context.result = NULL;
    }

    DECREF(segments);
    DECREF(files);
    if (error) {
        DECREF(args->seg_readers);
        args->seg_readers = NULL;
        RETHROW(error);
    }
}
コード例 #3
0
ファイル: BackgroundMerger.c プロジェクト: kidaa/lucy
static bool
S_merge_updated_deletions(BackgroundMerger *self) {
    BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self);
    Hash *updated_deletions = NULL;

    PolyReader *new_polyreader
        = PolyReader_open((Obj*)ivars->folder, NULL, NULL);
    Vector *new_seg_readers
        = PolyReader_Get_Seg_Readers(new_polyreader);
    Vector *old_seg_readers
        = PolyReader_Get_Seg_Readers(ivars->polyreader);
    Hash *new_segs = Hash_new(Vec_Get_Size(new_seg_readers));

    for (uint32_t i = 0, max = Vec_Get_Size(new_seg_readers); i < max; i++) {
        SegReader *seg_reader = (SegReader*)Vec_Fetch(new_seg_readers, i);
        String    *seg_name   = SegReader_Get_Seg_Name(seg_reader);
        Hash_Store(new_segs, seg_name, INCREF(seg_reader));
    }

    for (uint32_t i = 0, max = Vec_Get_Size(old_seg_readers); i < max; i++) {
        SegReader *seg_reader = (SegReader*)Vec_Fetch(old_seg_readers, i);
        String    *seg_name   = SegReader_Get_Seg_Name(seg_reader);

        // If this segment was merged away...
        if (Hash_Fetch(ivars->doc_maps, seg_name)) {
            SegReader *new_seg_reader
                = (SegReader*)CERTIFY(
                      Hash_Fetch(new_segs, seg_name),
                      SEGREADER);
            int32_t old_del_count = SegReader_Del_Count(seg_reader);
            int32_t new_del_count = SegReader_Del_Count(new_seg_reader);
            // ... were any new deletions applied against it?
            if (old_del_count != new_del_count) {
                DeletionsReader *del_reader
                    = (DeletionsReader*)SegReader_Obtain(
                          new_seg_reader,
                          Class_Get_Name(DELETIONSREADER));
                if (!updated_deletions) {
                    updated_deletions = Hash_new(max);
                }
                Hash_Store(updated_deletions, seg_name,
                           (Obj*)DelReader_Iterator(del_reader));
            }
        }
    }

    DECREF(new_polyreader);
    DECREF(new_segs);

    if (!updated_deletions) {
        return false;
    }
    else {
        PolyReader *merge_polyreader
            = PolyReader_open((Obj*)ivars->folder, ivars->snapshot, NULL);
        Vector *merge_seg_readers
            = PolyReader_Get_Seg_Readers(merge_polyreader);
        Snapshot *latest_snapshot
            = Snapshot_Read_File(Snapshot_new(), ivars->folder, NULL);
        int64_t new_seg_num
            = IxManager_Highest_Seg_Num(ivars->manager, latest_snapshot) + 1;
        Segment   *new_segment = Seg_new(new_seg_num);
        SegWriter *seg_writer  = SegWriter_new(ivars->schema, ivars->snapshot,
                                               new_segment, merge_polyreader);
        DeletionsWriter *del_writer = SegWriter_Get_Del_Writer(seg_writer);
        int64_t  merge_seg_num = Seg_Get_Number(ivars->segment);
        uint32_t seg_tick      = INT32_MAX;
        int32_t  offset        = INT32_MAX;

        SegWriter_Prep_Seg_Dir(seg_writer);

        for (uint32_t i = 0, max = Vec_Get_Size(merge_seg_readers); i < max; i++) {
            SegReader *seg_reader
                = (SegReader*)Vec_Fetch(merge_seg_readers, i);
            if (SegReader_Get_Seg_Num(seg_reader) == merge_seg_num) {
                I32Array *offsets = PolyReader_Offsets(merge_polyreader);
                seg_tick = i;
                offset = I32Arr_Get(offsets, seg_tick);
                DECREF(offsets);
            }
        }
        if (offset == INT32_MAX) { THROW(ERR, "Failed sanity check"); }

        HashIterator *iter = HashIter_new(updated_deletions);
        while (HashIter_Next(iter)) {
            String  *seg_name  = HashIter_Get_Key(iter);
            Matcher *deletions = (Matcher*)HashIter_Get_Value(iter);

            I32Array *doc_map
                = (I32Array*)CERTIFY(
                      Hash_Fetch(ivars->doc_maps, seg_name),
                      I32ARRAY);
            int32_t del;
            while (0 != (del = Matcher_Next(deletions))) {
                // Find the slot where the deleted doc resides in the
                // rewritten segment. If the doc was already deleted when we
                // were merging, do nothing.
                int32_t remapped = I32Arr_Get(doc_map, del);
                if (remapped) {
                    // It's a new deletion, so carry it forward and zap it in
                    // the rewritten segment.
                    DelWriter_Delete_By_Doc_ID(del_writer, remapped + offset);
                }
            }
        }
        DECREF(iter);

        // Finish the segment and clean up.
        DelWriter_Finish(del_writer);
        SegWriter_Finish(seg_writer);
        DECREF(seg_writer);
        DECREF(new_segment);
        DECREF(latest_snapshot);
        DECREF(merge_polyreader);
        DECREF(updated_deletions);
    }

    return true;
}
コード例 #4
0
ファイル: Indexer.c プロジェクト: rectang/lucy
Indexer*
Indexer_init(Indexer *self, Schema *schema, Obj *index,
             IndexManager *manager, int32_t flags) {
    IndexerIVARS *const ivars = Indexer_IVARS(self);
    bool      create   = (flags & Indexer_CREATE)   ? true : false;
    bool      truncate = (flags & Indexer_TRUNCATE) ? true : false;
    Folder   *folder   = S_init_folder(index, create);
    Snapshot *latest_snapshot = Snapshot_new();

    // Init.
    ivars->stock_doc     = Doc_new(NULL, 0);
    ivars->truncate      = false;
    ivars->optimize      = false;
    ivars->prepared      = false;
    ivars->needs_commit  = false;
    ivars->snapfile      = NULL;
    ivars->merge_lock    = NULL;

    // Assign.
    ivars->folder       = folder;
    ivars->manager      = manager
                         ? (IndexManager*)INCREF(manager)
                         : IxManager_new(NULL, NULL);
    IxManager_Set_Folder(ivars->manager, folder);

    // Get a write lock for this folder.
    Lock *write_lock = IxManager_Make_Write_Lock(ivars->manager);
    Lock_Clear_Stale(write_lock);
    if (Lock_Obtain(write_lock)) {
        // Only assign if successful, otherwise DESTROY unlocks -- bad!
        ivars->write_lock = write_lock;
    }
    else {
        DECREF(write_lock);
        DECREF(self);
        RETHROW(INCREF(Err_get_error()));
    }

    // Find the latest snapshot or create a new one.
    String *latest_snapfile = IxFileNames_latest_snapshot(folder);
    if (latest_snapfile) {
        Snapshot_Read_File(latest_snapshot, folder, latest_snapfile);
    }

    // Look for an existing Schema if one wasn't supplied.
    if (schema) {
        ivars->schema = (Schema*)INCREF(schema);
    }
    else {
        if (!latest_snapfile) {
            S_release_write_lock(self);
            THROW(ERR, "No Schema supplied, and can't find one in the index");
        }
        else {
            String *schema_file = S_find_schema_file(latest_snapshot);
            Obj *dump = Json_slurp_json(folder, schema_file);
            if (dump) { // read file successfully
                ivars->schema = (Schema*)CERTIFY(Freezer_load(dump), SCHEMA);
                schema = ivars->schema;
                DECREF(dump);
                schema_file = NULL;
            }
            else {
                THROW(ERR, "Failed to parse %o", schema_file);
            }
        }
    }

    // If we're clobbering, start with an empty Snapshot and an empty
    // PolyReader.  Otherwise, start with the most recent Snapshot and an
    // up-to-date PolyReader.
    if (truncate) {
        ivars->snapshot = Snapshot_new();
        ivars->polyreader = PolyReader_new(schema, folder, NULL, NULL, NULL);
        ivars->truncate = true;
    }
    else {
        // TODO: clone most recent snapshot rather than read it twice.
        ivars->snapshot = (Snapshot*)INCREF(latest_snapshot);
        ivars->polyreader = latest_snapfile
                           ? PolyReader_open((Obj*)folder, NULL, NULL)
                           : PolyReader_new(schema, folder, NULL, NULL, NULL);

        if (latest_snapfile) {
            // Make sure than any existing fields which may have been
            // dynamically added during past indexing sessions get added.
            Schema *old_schema = PolyReader_Get_Schema(ivars->polyreader);
            Schema_Eat(schema, old_schema);
        }
    }

    // Zap detritus from previous sessions.
    // Note: we have to feed FilePurger with the most recent snapshot file
    // now, but with the Indexer's snapshot later.
    FilePurger *file_purger
        = FilePurger_new(folder, latest_snapshot, ivars->manager);
    FilePurger_Purge(file_purger);
    DECREF(file_purger);

    // Create a new segment.
    int64_t new_seg_num
        = IxManager_Highest_Seg_Num(ivars->manager, latest_snapshot) + 1;
    Lock *merge_lock = IxManager_Make_Merge_Lock(ivars->manager);
    if (Lock_Is_Locked(merge_lock)) {
        // If there's a background merge process going on, stay out of its
        // way.
        Hash *merge_data = IxManager_Read_Merge_Data(ivars->manager);
        Obj *cutoff_obj = merge_data
                          ? Hash_Fetch_Utf8(merge_data, "cutoff", 6)
                          : NULL;
        if (!cutoff_obj) {
            DECREF(merge_lock);
            DECREF(merge_data);
            THROW(ERR, "Background merge detected, but can't read merge data");
        }
        else {
            int64_t cutoff = Json_obj_to_i64(cutoff_obj);
            if (cutoff >= new_seg_num) {
                new_seg_num = cutoff + 1;
            }
        }
        DECREF(merge_data);
    }
    ivars->segment = Seg_new(new_seg_num);

    // Add all known fields to Segment.
    Vector *fields = Schema_All_Fields(schema);
    for (size_t i = 0, max = Vec_Get_Size(fields); i < max; i++) {
        Seg_Add_Field(ivars->segment, (String*)Vec_Fetch(fields, i));
    }
    DECREF(fields);

    DECREF(merge_lock);

    // Create new SegWriter and FilePurger.
    ivars->file_purger
        = FilePurger_new(folder, ivars->snapshot, ivars->manager);
    ivars->seg_writer = SegWriter_new(ivars->schema, ivars->snapshot,
                                     ivars->segment, ivars->polyreader);
    SegWriter_Prep_Seg_Dir(ivars->seg_writer);

    // Grab a local ref to the DeletionsWriter.
    ivars->del_writer = (DeletionsWriter*)INCREF(
                           SegWriter_Get_Del_Writer(ivars->seg_writer));

    DECREF(latest_snapfile);
    DECREF(latest_snapshot);

    return self;
}