static void
S_lazy_init_sorted_ids(SortFieldWriter *self) {
    SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self);
    if (ivars->sorted_ids) { return; }

    // Counting sort. Could be optimized by working directly on the
    // ordinal arrays.

    SortCache *sort_cache      = ivars->sort_cache;
    int32_t    run_cardinality = ivars->run_cardinality;
    int32_t    run_max         = ivars->run_max;

    // Count.
    int32_t *counts = (int32_t*)CALLOCATE(run_cardinality, sizeof(int32_t));
    for (int32_t doc_id = 0; doc_id <= run_max; ++doc_id) {
        int32_t ord = SortCache_Ordinal(sort_cache, doc_id);
        ++counts[ord];
    }

    // Compute partial sums.
    int32_t sum = 0;
    for (int32_t ord = 0; ord < run_cardinality; ++ord) {
        int32_t count = counts[ord];
        counts[ord] = sum;
        sum += count;
    }

    // Distribute.
    int32_t *sorted_ids = (int32_t*)MALLOCATE((run_max + 1) * sizeof(int32_t));
    for (int32_t doc_id = 0; doc_id <= run_max; ++doc_id) {
        int32_t ord = SortCache_Ordinal(sort_cache, doc_id);
        int32_t pos = counts[ord]++;
        sorted_ids[pos] = doc_id;
    }

    ivars->sorted_ids = sorted_ids;
    FREEMEM(counts);
}
uint32_t
SortFieldWriter_Refill_IMP(SortFieldWriter *self) {
    SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self);
    if (!ivars->sort_cache) { return 0; }

    // Sanity check, then reset the buffer and prepare to start loading items.
    uint32_t buf_count = SortFieldWriter_Buffer_Count(self);
    if (buf_count) {
        THROW(ERR, "Refill called but buffer contains %u32 items",
              buf_count);
    }
    SortFieldWriter_Clear_Buffer(self);
    Counter_Reset(ivars->counter);
    S_lazy_init_sorted_ids(self);

    const int32_t    null_ord   = ivars->null_ord;
    I32Array *const  doc_map    = ivars->doc_map;
    SortCache *const sort_cache = ivars->sort_cache;

    uint32_t count = 0;
    while (ivars->run_tick <= ivars->run_max
           && Counter_Get_Value(ivars->counter) < ivars->mem_thresh
          ) {
        int32_t raw_doc_id = ivars->sorted_ids[ivars->run_tick];
        int32_t ord = SortCache_Ordinal(sort_cache, raw_doc_id);
        if (ord != null_ord) {
            int32_t remapped = doc_map
                               ? I32Arr_Get(doc_map, raw_doc_id)
                               : raw_doc_id;
            if (remapped) {
                Obj *val = SortCache_Value(sort_cache, ord);
                SortFieldWriter_Add(self, remapped, val);
                count++;
                DECREF(val);
            }
        }
        ivars->run_tick++;
    }

    if (ivars->run_tick > ivars->run_max) {
        DECREF(ivars->sort_cache);
        ivars->sort_cache = NULL;
        FREEMEM(ivars->sorted_ids);
        ivars->sorted_ids = NULL;
    }

    return count;
}
Example #3
0
int32_t
RangeMatcher_next(RangeMatcher* self) {
    while (1) {
        if (++self->doc_id > self->doc_max) {
            self->doc_id--;
            return 0;
        }
        else {
            // Check if ord for this document is within the specied range.
            // TODO: Unroll? i.e. use SortCache_Get_Ords at constructor time
            // and save ourselves some method call overhead.
            const int32_t ord
                = SortCache_Ordinal(self->sort_cache, self->doc_id);
            if (ord >= self->lower_bound && ord <= self->upper_bound) {
                break;
            }
        }
    }
    return self->doc_id;
}
Example #4
0
void
SortColl_collect(SortCollector *self, int32_t doc_id) {
    SortCollectorIVARS *const ivars = SortColl_IVARS(self);

    // Add to the total number of hits.
    ivars->total_hits++;

    // Collect this hit if it's competitive.
    if (SI_competitive(ivars, doc_id)) {
        MatchDoc *const match_doc = ivars->bumped;
        MatchDocIVARS *const match_doc_ivars = MatchDoc_IVARS(match_doc);
        match_doc_ivars->doc_id = doc_id + ivars->base;

        if (ivars->need_score && match_doc_ivars->score == F32_NEGINF) {
            match_doc_ivars->score = Matcher_Score(ivars->matcher);
        }

        // Fetch values so that cross-segment sorting can work.
        if (ivars->need_values) {
            VArray *values = match_doc_ivars->values;

            for (uint32_t i = 0, max = ivars->num_rules; i < max; i++) {
                SortCache *cache   = ivars->sort_caches[i];
                Obj       *old_val = (Obj*)VA_Delete(values, i);
                if (cache) {
                    int32_t ord = SortCache_Ordinal(cache, doc_id);
                    Obj *blank = old_val
                                 ? old_val
                                 : SortCache_Make_Blank(cache);
                    Obj *val = SortCache_Value(cache, ord, blank);
                    if (val) { VA_Store(values, i, (Obj*)val); }
                    else     { DECREF(blank); }
                }
            }
        }

        // Insert the new MatchDoc.
        ivars->bumped = (MatchDoc*)HitQ_Jostle(ivars->hit_q, (Obj*)match_doc);

        if (ivars->bumped) {
            if (ivars->bumped == match_doc) {
                /* The queue is full, and we have established a threshold for
                 * this segment as to what sort of document is definitely not
                 * acceptable.  Turn off AUTO_ACCEPT and start actually
                 * testing whether hits are competitive. */
                ivars->bubble_score  = match_doc_ivars->score;
                ivars->bubble_doc    = doc_id;
                ivars->actions       = ivars->derived_actions;
            }

            // Recycle.
            MatchDoc_IVARS(ivars->bumped)->score = ivars->need_score
                                                   ? F32_NEGINF
                                                   : F32_NAN;
        }
        else {
            // The queue isn't full yet, so create a fresh MatchDoc.
            VArray *values = ivars->need_values
                             ? VA_new(ivars->num_rules)
                             : NULL;
            float fake_score = ivars->need_score ? F32_NEGINF : F32_NAN;
            ivars->bumped = MatchDoc_new(INT32_MAX, fake_score, values);
            DECREF(values);
        }

    }
}