示例#1
0
VArray*
IxManager_recycle(IndexManager *self, PolyReader *reader, 
                  DeletionsWriter *del_writer, int64_t cutoff, bool_t optimize)
{
    VArray *seg_readers = PolyReader_Get_Seg_Readers(reader);
    VArray *candidates  = VA_Gather(seg_readers, S_check_cutoff, &cutoff);
    VArray *recyclables = VA_new(VA_Get_Size(candidates));
    const uint32_t num_candidates = VA_Get_Size(candidates);

    if (optimize) { 
        DECREF(recyclables);
        return candidates; 
    }

    // Sort by ascending size in docs, choose sparsely populated segments.
    VA_Sort(candidates, S_compare_doc_count, NULL);
    int32_t *counts = (int32_t*)MALLOCATE(num_candidates * sizeof(int32_t));
    for (uint32_t i = 0; i < num_candidates; i++) {
        SegReader *seg_reader = (SegReader*)CERTIFY(
            VA_Fetch(candidates, i), SEGREADER);
        counts[i] = SegReader_Doc_Count(seg_reader);
    }
    I32Array *doc_counts = I32Arr_new_steal(counts, num_candidates);
    uint32_t threshold = IxManager_Choose_Sparse(self, doc_counts);
    DECREF(doc_counts);

    // Move SegReaders to be recycled.
    for (uint32_t i = 0; i < threshold; i++) {
        VA_Store(recyclables, i, VA_Delete(candidates, i));
    }

    // Find segments where at least 10% of all docs have been deleted. 
    for (uint32_t i = threshold; i < num_candidates; i++) {
        SegReader *seg_reader = (SegReader*)VA_Delete(candidates, i);
        CharBuf   *seg_name   = SegReader_Get_Seg_Name(seg_reader);
        double doc_max = SegReader_Doc_Max(seg_reader);
        double num_deletions = DelWriter_Seg_Del_Count(del_writer, seg_name);
        double del_proportion = num_deletions / doc_max;
        if (del_proportion >= 0.1) {
            VA_Push(recyclables, (Obj*)seg_reader);
        }
        else {
            DECREF(seg_reader);
        }
    }

    DECREF(candidates);
    return recyclables;
}
示例#2
0
void
SortColl_collect(SortCollector *self, int32_t doc_id) {
    SortCollectorIVARS *const ivars = SortColl_IVARS(self);

    // Add to the total number of hits.
    ivars->total_hits++;

    // Collect this hit if it's competitive.
    if (SI_competitive(ivars, doc_id)) {
        MatchDoc *const match_doc = ivars->bumped;
        MatchDocIVARS *const match_doc_ivars = MatchDoc_IVARS(match_doc);
        match_doc_ivars->doc_id = doc_id + ivars->base;

        if (ivars->need_score && match_doc_ivars->score == F32_NEGINF) {
            match_doc_ivars->score = Matcher_Score(ivars->matcher);
        }

        // Fetch values so that cross-segment sorting can work.
        if (ivars->need_values) {
            VArray *values = match_doc_ivars->values;

            for (uint32_t i = 0, max = ivars->num_rules; i < max; i++) {
                SortCache *cache   = ivars->sort_caches[i];
                Obj       *old_val = (Obj*)VA_Delete(values, i);
                if (cache) {
                    int32_t ord = SortCache_Ordinal(cache, doc_id);
                    Obj *blank = old_val
                                 ? old_val
                                 : SortCache_Make_Blank(cache);
                    Obj *val = SortCache_Value(cache, ord, blank);
                    if (val) { VA_Store(values, i, (Obj*)val); }
                    else     { DECREF(blank); }
                }
            }
        }

        // Insert the new MatchDoc.
        ivars->bumped = (MatchDoc*)HitQ_Jostle(ivars->hit_q, (Obj*)match_doc);

        if (ivars->bumped) {
            if (ivars->bumped == match_doc) {
                /* The queue is full, and we have established a threshold for
                 * this segment as to what sort of document is definitely not
                 * acceptable.  Turn off AUTO_ACCEPT and start actually
                 * testing whether hits are competitive. */
                ivars->bubble_score  = match_doc_ivars->score;
                ivars->bubble_doc    = doc_id;
                ivars->actions       = ivars->derived_actions;
            }

            // Recycle.
            MatchDoc_IVARS(ivars->bumped)->score = ivars->need_score
                                                   ? F32_NEGINF
                                                   : F32_NAN;
        }
        else {
            // The queue isn't full yet, so create a fresh MatchDoc.
            VArray *values = ivars->need_values
                             ? VA_new(ivars->num_rules)
                             : NULL;
            float fake_score = ivars->need_score ? F32_NEGINF : F32_NAN;
            ivars->bumped = MatchDoc_new(INT32_MAX, fake_score, values);
            DECREF(values);
        }

    }
}
示例#3
0
文件: SortWriter.c 项目: theory/lucy
void
SortWriter_finish(SortWriter *self) {
    SortWriterIVARS *const ivars = SortWriter_IVARS(self);
    VArray *const field_writers = ivars->field_writers;

    // If we have no data, bail out.
    if (!ivars->temp_ord_out) { return; }

    // If we've either flushed or added segments, flush everything so that any
    // one field can use the entire margin up to mem_thresh.
    if (ivars->flush_at_finish) {
        for (uint32_t i = 1, max = VA_Get_Size(field_writers); i < max; i++) {
            SortFieldWriter *field_writer
                = (SortFieldWriter*)VA_Fetch(field_writers, i);
            if (field_writer) {
                SortFieldWriter_Flush(field_writer);
            }
        }
    }

    // Close down temp streams.
    OutStream_Close(ivars->temp_ord_out);
    OutStream_Close(ivars->temp_ix_out);
    OutStream_Close(ivars->temp_dat_out);

    for (uint32_t i = 1, max = VA_Get_Size(field_writers); i < max; i++) {
        SortFieldWriter *field_writer
            = (SortFieldWriter*)VA_Delete(field_writers, i);
        if (field_writer) {
            CharBuf *field = Seg_Field_Name(ivars->segment, i);
            SortFieldWriter_Flip(field_writer);
            int32_t count = SortFieldWriter_Finish(field_writer);
            Hash_Store(ivars->counts, (Obj*)field,
                       (Obj*)CB_newf("%i32", count));
            int32_t null_ord = SortFieldWriter_Get_Null_Ord(field_writer);
            if (null_ord != -1) {
                Hash_Store(ivars->null_ords, (Obj*)field,
                           (Obj*)CB_newf("%i32", null_ord));
            }
            int32_t ord_width = SortFieldWriter_Get_Ord_Width(field_writer);
            Hash_Store(ivars->ord_widths, (Obj*)field,
                       (Obj*)CB_newf("%i32", ord_width));
        }

        DECREF(field_writer);
    }
    VA_Clear(field_writers);

    // Store metadata.
    Seg_Store_Metadata_Str(ivars->segment, "sort", 4,
                           (Obj*)SortWriter_Metadata(self));

    // Clean up.
    Folder  *folder   = ivars->folder;
    CharBuf *seg_name = Seg_Get_Name(ivars->segment);
    CharBuf *path     = CB_newf("%o/sort_ord_temp", seg_name);
    Folder_Delete(folder, path);
    CB_setf(path, "%o/sort_ix_temp", seg_name);
    Folder_Delete(folder, path);
    CB_setf(path, "%o/sort_dat_temp", seg_name);
    Folder_Delete(folder, path);
    DECREF(path);
}