static void S_lazy_init_sorted_ids(SortFieldWriter *self) { SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self); if (ivars->sorted_ids) { return; } // Counting sort. Could be optimized by working directly on the // ordinal arrays. SortCache *sort_cache = ivars->sort_cache; int32_t run_cardinality = ivars->run_cardinality; int32_t run_max = ivars->run_max; // Count. int32_t *counts = (int32_t*)CALLOCATE(run_cardinality, sizeof(int32_t)); for (int32_t doc_id = 0; doc_id <= run_max; ++doc_id) { int32_t ord = SortCache_Ordinal(sort_cache, doc_id); ++counts[ord]; } // Compute partial sums. int32_t sum = 0; for (int32_t ord = 0; ord < run_cardinality; ++ord) { int32_t count = counts[ord]; counts[ord] = sum; sum += count; } // Distribute. int32_t *sorted_ids = (int32_t*)MALLOCATE((run_max + 1) * sizeof(int32_t)); for (int32_t doc_id = 0; doc_id <= run_max; ++doc_id) { int32_t ord = SortCache_Ordinal(sort_cache, doc_id); int32_t pos = counts[ord]++; sorted_ids[pos] = doc_id; } ivars->sorted_ids = sorted_ids; FREEMEM(counts); }
uint32_t SortFieldWriter_Refill_IMP(SortFieldWriter *self) { SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self); if (!ivars->sort_cache) { return 0; } // Sanity check, then reset the buffer and prepare to start loading items. uint32_t buf_count = SortFieldWriter_Buffer_Count(self); if (buf_count) { THROW(ERR, "Refill called but buffer contains %u32 items", buf_count); } SortFieldWriter_Clear_Buffer(self); Counter_Reset(ivars->counter); S_lazy_init_sorted_ids(self); const int32_t null_ord = ivars->null_ord; I32Array *const doc_map = ivars->doc_map; SortCache *const sort_cache = ivars->sort_cache; uint32_t count = 0; while (ivars->run_tick <= ivars->run_max && Counter_Get_Value(ivars->counter) < ivars->mem_thresh ) { int32_t raw_doc_id = ivars->sorted_ids[ivars->run_tick]; int32_t ord = SortCache_Ordinal(sort_cache, raw_doc_id); if (ord != null_ord) { int32_t remapped = doc_map ? I32Arr_Get(doc_map, raw_doc_id) : raw_doc_id; if (remapped) { Obj *val = SortCache_Value(sort_cache, ord); SortFieldWriter_Add(self, remapped, val); count++; DECREF(val); } } ivars->run_tick++; } if (ivars->run_tick > ivars->run_max) { DECREF(ivars->sort_cache); ivars->sort_cache = NULL; FREEMEM(ivars->sorted_ids); ivars->sorted_ids = NULL; } return count; }
int32_t RangeMatcher_next(RangeMatcher* self) { while (1) { if (++self->doc_id > self->doc_max) { self->doc_id--; return 0; } else { // Check if ord for this document is within the specied range. // TODO: Unroll? i.e. use SortCache_Get_Ords at constructor time // and save ourselves some method call overhead. const int32_t ord = SortCache_Ordinal(self->sort_cache, self->doc_id); if (ord >= self->lower_bound && ord <= self->upper_bound) { break; } } } return self->doc_id; }
void SortColl_collect(SortCollector *self, int32_t doc_id) { SortCollectorIVARS *const ivars = SortColl_IVARS(self); // Add to the total number of hits. ivars->total_hits++; // Collect this hit if it's competitive. if (SI_competitive(ivars, doc_id)) { MatchDoc *const match_doc = ivars->bumped; MatchDocIVARS *const match_doc_ivars = MatchDoc_IVARS(match_doc); match_doc_ivars->doc_id = doc_id + ivars->base; if (ivars->need_score && match_doc_ivars->score == F32_NEGINF) { match_doc_ivars->score = Matcher_Score(ivars->matcher); } // Fetch values so that cross-segment sorting can work. if (ivars->need_values) { VArray *values = match_doc_ivars->values; for (uint32_t i = 0, max = ivars->num_rules; i < max; i++) { SortCache *cache = ivars->sort_caches[i]; Obj *old_val = (Obj*)VA_Delete(values, i); if (cache) { int32_t ord = SortCache_Ordinal(cache, doc_id); Obj *blank = old_val ? old_val : SortCache_Make_Blank(cache); Obj *val = SortCache_Value(cache, ord, blank); if (val) { VA_Store(values, i, (Obj*)val); } else { DECREF(blank); } } } } // Insert the new MatchDoc. ivars->bumped = (MatchDoc*)HitQ_Jostle(ivars->hit_q, (Obj*)match_doc); if (ivars->bumped) { if (ivars->bumped == match_doc) { /* The queue is full, and we have established a threshold for * this segment as to what sort of document is definitely not * acceptable. Turn off AUTO_ACCEPT and start actually * testing whether hits are competitive. */ ivars->bubble_score = match_doc_ivars->score; ivars->bubble_doc = doc_id; ivars->actions = ivars->derived_actions; } // Recycle. MatchDoc_IVARS(ivars->bumped)->score = ivars->need_score ? F32_NEGINF : F32_NAN; } else { // The queue isn't full yet, so create a fresh MatchDoc. VArray *values = ivars->need_values ? VA_new(ivars->num_rules) : NULL; float fake_score = ivars->need_score ? F32_NEGINF : F32_NAN; ivars->bumped = MatchDoc_new(INT32_MAX, fake_score, values); DECREF(values); } } }