Пример #1
0
Hash*
DefDelWriter_Metadata_IMP(DefaultDeletionsWriter *self) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    DefDelWriter_Metadata_t super_meta
        = (DefDelWriter_Metadata_t)SUPER_METHOD_PTR(DEFAULTDELETIONSWRITER,
                                                    LUCY_DefDelWriter_Metadata);
    Hash    *const metadata = super_meta(self);
    Hash    *const files    = Hash_new(0);

    for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) {
        SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i);
        if (ivars->updated[i]) {
            BitVector *deldocs   = (BitVector*)VA_Fetch(ivars->bit_vecs, i);
            Segment   *segment   = SegReader_Get_Segment(seg_reader);
            Hash      *mini_meta = Hash_new(2);
            Hash_Store_Utf8(mini_meta, "count", 5,
                            (Obj*)Str_newf("%u32", (uint32_t)BitVec_Count(deldocs)));
            Hash_Store_Utf8(mini_meta, "filename", 8,
                            (Obj*)S_del_filename(self, seg_reader));
            Hash_Store(files, (Obj*)Seg_Get_Name(segment), (Obj*)mini_meta);
        }
    }
    Hash_Store_Utf8(metadata, "files", 5, (Obj*)files);

    return metadata;
}
Пример #2
0
SegReader*
SegReader_init(SegReader *self, Schema *schema, Folder *folder,
               Snapshot *snapshot, Vector *segments, int32_t seg_tick) {
    Segment *segment;

    IxReader_init((IndexReader*)self, schema, folder, snapshot, segments,
                  seg_tick, NULL);
    SegReaderIVARS *const ivars = SegReader_IVARS(self);
    segment = SegReader_Get_Segment(self);

    ivars->doc_max    = (int32_t)Seg_Get_Count(segment);
    ivars->seg_name   = (String*)INCREF(Seg_Get_Name(segment));
    ivars->seg_num    = Seg_Get_Number(segment);
    Err *error = Err_trap(S_try_init_components, self);
    if (error) {
        // An error occurred, so clean up self and rethrow the exception.
        DECREF(self);
        RETHROW(error);
    }

    DeletionsReader *del_reader
        = (DeletionsReader*)Hash_Fetch(
              ivars->components, Class_Get_Name(DELETIONSREADER));
    ivars->del_count = del_reader ? DelReader_Del_Count(del_reader) : 0;

    return self;
}
Пример #3
0
Matcher*
DefDelWriter_Seg_Deletions_IMP(DefaultDeletionsWriter *self,
                               SegReader *seg_reader) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    Matcher *deletions    = NULL;
    Segment *segment      = SegReader_Get_Segment(seg_reader);
    String  *seg_name     = Seg_Get_Name(segment);
    Integer32 *tick_obj   = (Integer32*)Hash_Fetch(ivars->name_to_tick,
                                                   (Obj*)seg_name);
    int32_t tick          = tick_obj ? Int32_Get_Value(tick_obj) : 0;
    SegReader *candidate  = tick_obj
                            ? (SegReader*)VA_Fetch(ivars->seg_readers, tick)
                            : NULL;

    if (tick_obj) {
        DeletionsReader *del_reader
            = (DeletionsReader*)SegReader_Obtain(
                  candidate, Class_Get_Name(DELETIONSREADER));
        if (ivars->updated[tick] || DelReader_Del_Count(del_reader)) {
            BitVector *deldocs = (BitVector*)VA_Fetch(ivars->bit_vecs, tick);
            deletions = (Matcher*)BitVecMatcher_new(deldocs);
        }
    }
    else { // Sanity check.
        THROW(ERR, "Couldn't find SegReader %o", seg_reader);
    }

    return deletions;
}
Пример #4
0
PolyReader*
PolyReader_init(PolyReader *self, Schema *schema, Folder *folder,
                Snapshot *snapshot, IndexManager *manager,
                VArray *sub_readers) {
    PolyReaderIVARS *const ivars = PolyReader_IVARS(self);
    ivars->doc_max    = 0;
    ivars->del_count  = 0;

    if (sub_readers) {
        uint32_t num_segs = VA_Get_Size(sub_readers);
        VArray *segments = VA_new(num_segs);
        for (uint32_t i = 0; i < num_segs; i++) {
            SegReader *seg_reader
                = (SegReader*)CERTIFY(VA_Fetch(sub_readers, i), SEGREADER);
            VA_Push(segments, INCREF(SegReader_Get_Segment(seg_reader)));
        }
        IxReader_init((IndexReader*)self, schema, folder, snapshot,
                      segments, -1, manager);
        DECREF(segments);
        S_init_sub_readers(self, sub_readers);
    }
    else {
        IxReader_init((IndexReader*)self, schema, folder, snapshot,
                      NULL, -1, manager);
        ivars->sub_readers = VA_new(0);
        ivars->offsets = I32Arr_new_steal(NULL, 0);
    }

    return self;
}
Пример #5
0
static String*
S_del_filename(DefaultDeletionsWriter *self, SegReader *target_reader) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    Segment *target_seg = SegReader_Get_Segment(target_reader);
    return Str_newf("%o/deletions-%o.bv", Seg_Get_Name(ivars->segment),
                    Seg_Get_Name(target_seg));
}
Пример #6
0
void
DefDelWriter_Merge_Segment_IMP(DefaultDeletionsWriter *self,
                               SegReader *reader, I32Array *doc_map) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    UNUSED_VAR(doc_map);
    Segment *segment = SegReader_Get_Segment(reader);
    Hash *del_meta = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "deletions", 9);

    if (del_meta) {
        Vector *seg_readers = ivars->seg_readers;
        Hash   *files = (Hash*)Hash_Fetch_Utf8(del_meta, "files", 5);
        if (files) {
            HashIterator *iter = HashIter_new(files);
            while (HashIter_Next(iter)) {
                String *seg       = HashIter_Get_Key(iter);
                Hash   *mini_meta = (Hash*)HashIter_Get_Value(iter);

                /* Find the segment the deletions from the SegReader
                 * we're adding correspond to.  If it's gone, we don't
                 * need to worry about losing deletions files that point
                 * at it. */
                for (size_t i = 0, max = Vec_Get_Size(seg_readers); i < max; i++) {
                    SegReader *candidate
                        = (SegReader*)Vec_Fetch(seg_readers, i);
                    String *candidate_name
                        = Seg_Get_Name(SegReader_Get_Segment(candidate));

                    if (Str_Equals(seg, (Obj*)candidate_name)) {
                        /* If the count hasn't changed, we're about to
                         * merge away the most recent deletions file
                         * pointing at this target segment -- so force a
                         * new file to be written out. */
                        int32_t count = (int32_t)Json_obj_to_i64(Hash_Fetch_Utf8(mini_meta, "count", 5));
                        DeletionsReader *del_reader
                            = (DeletionsReader*)SegReader_Obtain(
                                  candidate, Class_Get_Name(DELETIONSREADER));
                        if (count == DelReader_Del_Count(del_reader)) {
                            ivars->updated[i] = true;
                        }
                        break;
                    }
                }
            }
            DECREF(iter);
        }
    }
}
Пример #7
0
void
DefDelWriter_merge_segment(DefaultDeletionsWriter *self, SegReader *reader,
                           I32Array *doc_map) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    UNUSED_VAR(doc_map);
    Segment *segment = SegReader_Get_Segment(reader);
    Hash *del_meta = (Hash*)Seg_Fetch_Metadata_Str(segment, "deletions", 9);

    if (del_meta) {
        VArray *seg_readers = ivars->seg_readers;
        Hash   *files = (Hash*)Hash_Fetch_Str(del_meta, "files", 5);
        if (files) {
            CharBuf *seg;
            Hash *mini_meta;
            Hash_Iterate(files);
            while (Hash_Next(files, (Obj**)&seg, (Obj**)&mini_meta)) {

                /* Find the segment the deletions from the SegReader
                 * we're adding correspond to.  If it's gone, we don't
                 * need to worry about losing deletions files that point
                 * at it. */
                for (uint32_t i = 0, max = VA_Get_Size(seg_readers); i < max; i++) {
                    SegReader *candidate
                        = (SegReader*)VA_Fetch(seg_readers, i);
                    CharBuf *candidate_name
                        = Seg_Get_Name(SegReader_Get_Segment(candidate));

                    if (CB_Equals(seg, (Obj*)candidate_name)) {
                        /* If the count hasn't changed, we're about to
                         * merge away the most recent deletions file
                         * pointing at this target segment -- so force a
                         * new file to be written out. */
                        int32_t count = (int32_t)Obj_To_I64(Hash_Fetch_Str(mini_meta, "count", 5));
                        DeletionsReader *del_reader
                            = (DeletionsReader*)SegReader_Obtain(
                                  candidate, VTable_Get_Name(DELETIONSREADER));
                        if (count == DelReader_Del_Count(del_reader)) {
                            ivars->updated[i] = true;
                        }
                        break;
                    }
                }
            }
        }
    }
}
Пример #8
0
void
HLWriter_delete_segment(HighlightWriter *self, SegReader *reader)
{
    CharBuf  *merged_seg_name = Seg_Get_Name(SegReader_Get_Segment(reader));
    Snapshot *snapshot = HLWriter_Get_Snapshot(self);
    CharBuf  *ix_file  = CB_newf("%o/highlight.ix", merged_seg_name);
    CharBuf  *dat_file = CB_newf("%o/highlight.dat", merged_seg_name);
    Snapshot_Delete_Entry(snapshot, ix_file);
    Snapshot_Delete_Entry(snapshot, dat_file);
    DECREF(ix_file);
    DECREF(dat_file);
}
Пример #9
0
void
SegWriter_Delete_Segment_IMP(SegWriter *self, SegReader *reader) {
    SegWriterIVARS *const ivars = SegWriter_IVARS(self);
    Snapshot *snapshot = SegWriter_Get_Snapshot(self);
    String   *seg_name = Seg_Get_Name(SegReader_Get_Segment(reader));

    // Have all the sub-writers delete the segment.
    for (size_t i = 0, max = Vec_Get_Size(ivars->writers); i < max; i++) {
        DataWriter *writer = (DataWriter*)Vec_Fetch(ivars->writers, i);
        DataWriter_Delete_Segment(writer, reader);
    }
    DelWriter_Delete_Segment(ivars->del_writer, reader);

    // Remove seg directory from snapshot.
    Snapshot_Delete_Entry(snapshot, seg_name);
}
Пример #10
0
void
LexWriter_delete_segment(LexiconWriter *self, SegReader *reader)
{
    Snapshot *snapshot = LexWriter_Get_Snapshot(self);
    CharBuf  *merged_seg_name = Seg_Get_Name(SegReader_Get_Segment(reader));
    CharBuf  *pattern  = CB_newf("%o/lexicon", merged_seg_name);
    VArray   *files = Snapshot_List(snapshot);
    VArray   *my_old_files = VA_Grep(files, S_my_file, pattern);
    u32_t i, max;

    for (i = 0, max = VA_Get_Size(my_old_files); i < max; i++) {
        CharBuf *entry = (CharBuf*)VA_Fetch(my_old_files, i);
        Snapshot_Delete_Entry(snapshot, entry);
    }
    DECREF(my_old_files);
    DECREF(files);
    DECREF(pattern);
}
Пример #11
0
void
SegWriter_Merge_Segment_IMP(SegWriter *self, SegReader *reader,
                            I32Array *doc_map) {
    SegWriterIVARS *const ivars = SegWriter_IVARS(self);
    Snapshot *snapshot = SegWriter_Get_Snapshot(self);
    String   *seg_name = Seg_Get_Name(SegReader_Get_Segment(reader));

    // Have all the sub-writers merge the segment.
    for (uint32_t i = 0, max = Vec_Get_Size(ivars->writers); i < max; i++) {
        DataWriter *writer = (DataWriter*)Vec_Fetch(ivars->writers, i);
        DataWriter_Merge_Segment(writer, reader, doc_map);
    }
    DelWriter_Merge_Segment(ivars->del_writer, reader, doc_map);

    // Remove seg directory from snapshot.
    Snapshot_Delete_Entry(snapshot, seg_name);

    // Adust the document id.
    S_adjust_doc_id(self, reader, doc_map);
}
Пример #12
0
SegReader*
SegReader_init(SegReader *self, Schema *schema, Folder *folder,
               Snapshot *snapshot, VArray *segments, i32_t seg_tick)
{
    CharBuf *mess;
    IxReader_init((IndexReader*)self, schema, folder, snapshot, segments,
        seg_tick, NULL);
    self->doc_max    = Seg_Get_Count(SegReader_Get_Segment(self));
    mess = SegReader_Try_Init_Components(self);
    if (mess) {
        /* An error occurred, so clean up self and throw an exception. */
        DECREF(self);
        Err_throw_mess(mess);
    }
    {
        DeletionsReader *del_reader = (DeletionsReader*)Hash_Fetch(
            self->components, DELETIONSREADER.name);
        self->del_count = del_reader ? DelReader_Del_Count(del_reader) : 0;
    }
    return self;
}
Пример #13
0
Hash*
DefDelWriter_metadata(DefaultDeletionsWriter *self) {
    DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self);
    Hash    *const metadata = DataWriter_metadata((DataWriter*)self);
    Hash    *const files    = Hash_new(0);

    for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) {
        SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i);
        if (ivars->updated[i]) {
            BitVector *deldocs   = (BitVector*)VA_Fetch(ivars->bit_vecs, i);
            Segment   *segment   = SegReader_Get_Segment(seg_reader);
            Hash      *mini_meta = Hash_new(2);
            Hash_Store_Str(mini_meta, "count", 5,
                           (Obj*)CB_newf("%u32", (uint32_t)BitVec_Count(deldocs)));
            Hash_Store_Str(mini_meta, "filename", 8,
                           (Obj*)S_del_filename(self, seg_reader));
            Hash_Store(files, (Obj*)Seg_Get_Name(segment), (Obj*)mini_meta);
        }
    }
    Hash_Store_Str(metadata, "files", 5, (Obj*)files);

    return metadata;
}