bool HitQ_Less_Than_IMP(HitQueue *self, Obj *obj_a, Obj *obj_b) { HitQueueIVARS *const ivars = HitQ_IVARS(self); MatchDoc *const a = (MatchDoc*)obj_a; MatchDoc *const b = (MatchDoc*)obj_b; MatchDocIVARS *a_ivars = MatchDoc_IVARS(a); MatchDocIVARS *b_ivars = MatchDoc_IVARS(b); uint32_t i = 0; uint8_t *const actions = ivars->actions; do { switch (actions[i] & ACTIONS_MASK) { case COMPARE_BY_SCORE: // Prefer high scores. if (a_ivars->score > b_ivars->score) { return false; } else if (a_ivars->score < b_ivars->score) { return true; } break; case COMPARE_BY_SCORE_REV: if (a_ivars->score > b_ivars->score) { return true; } else if (a_ivars->score < b_ivars->score) { return false; } break; case COMPARE_BY_DOC_ID: // Prefer low doc ids. if (a_ivars->doc_id > b_ivars->doc_id) { return true; } else if (a_ivars->doc_id < b_ivars->doc_id) { return false; } break; case COMPARE_BY_DOC_ID_REV: if (a_ivars->doc_id > b_ivars->doc_id) { return false; } else if (a_ivars->doc_id < b_ivars->doc_id) { return true; } break; case COMPARE_BY_VALUE: { int32_t comparison = SI_compare_by_value(ivars, i, a_ivars, b_ivars); if (comparison > 0) { return true; } else if (comparison < 0) { return false; } } break; case COMPARE_BY_VALUE_REV: { int32_t comparison = SI_compare_by_value(ivars, i, b_ivars, a_ivars); if (comparison > 0) { return true; } else if (comparison < 0) { return false; } } break; default: THROW(ERR, "Unexpected action %u8", actions[i]); } } while (++i < ivars->num_actions); return false; }
void SortColl_set_reader(SortCollector *self, SegReader *reader) { SortCollectorIVARS *const ivars = SortColl_IVARS(self); SortReader *sort_reader = (SortReader*)SegReader_Fetch(reader, VTable_Get_Name(SORTREADER)); // Reset threshold variables and trigger auto-action behavior. MatchDocIVARS *const bumped_ivars = MatchDoc_IVARS(ivars->bumped); bumped_ivars->doc_id = INT32_MAX; ivars->bubble_doc = INT32_MAX; bumped_ivars->score = ivars->need_score ? F32_NEGINF : F32_NAN; ivars->bubble_score = ivars->need_score ? F32_NEGINF : F32_NAN; ivars->actions = ivars->auto_actions; // Obtain sort caches. Derive actions array for this segment. if (ivars->need_values && sort_reader) { for (uint32_t i = 0, max = ivars->num_rules; i < max; i++) { SortRule *rule = (SortRule*)VA_Fetch(ivars->rules, i); CharBuf *field = SortRule_Get_Field(rule); SortCache *cache = field ? SortReader_Fetch_Sort_Cache(sort_reader, field) : NULL; ivars->sort_caches[i] = cache; ivars->derived_actions[i] = S_derive_action(rule, cache); if (cache) { ivars->ord_arrays[i] = SortCache_Get_Ords(cache); } else { ivars->ord_arrays[i] = NULL; } } } ivars->seg_doc_max = reader ? SegReader_Doc_Max(reader) : 0; Coll_set_reader((Collector*)self, reader); }
Obj* HitQ_Jostle_IMP(HitQueue *self, Obj *element) { HitQueueIVARS *const ivars = HitQ_IVARS(self); MatchDoc *match_doc = (MatchDoc*)CERTIFY(element, MATCHDOC); HitQ_Jostle_t super_jostle = SUPER_METHOD_PTR(HITQUEUE, LUCY_HitQ_Jostle); if (ivars->need_values) { MatchDocIVARS *const match_doc_ivars = MatchDoc_IVARS(match_doc); CERTIFY(match_doc_ivars->values, VARRAY); } return super_jostle(self, element); }
HitDoc* Hits_Next_IMP(Hits *self) { HitsIVARS *const ivars = Hits_IVARS(self); MatchDoc *match_doc = (MatchDoc*)VA_Fetch(ivars->match_docs, ivars->offset); ivars->offset++; if (!match_doc) { /** Bail if there aren't any more *captured* hits. (There may be more * total hits.) */ return NULL; } else { // Lazily fetch HitDoc, set score. MatchDocIVARS *match_doc_ivars = MatchDoc_IVARS(match_doc); HitDoc *hit_doc = Searcher_Fetch_Doc(ivars->searcher, match_doc_ivars->doc_id); HitDoc_Set_Score(hit_doc, match_doc_ivars->score); return hit_doc; } }
static INLINE bool SI_competitive(SortCollectorIVARS *ivars, int32_t doc_id) { /* Ordinarily, we would cache local copies of more member variables in * const automatic variables in order to improve code clarity and provide * more hints to the compiler about what variables are actually invariant * for the duration of this routine: * * uint8_t *const actions = ivars->actions; * const uint32_t num_rules = ivars->num_rules; * const int32_t bubble_doc = ivars->bubble_doc; * * However, our major goal is to return as quickly as possible, and the * common case is that we'll have our answer before the first loop iter * finishes -- so we don't worry about the cost of performing extra * dereferencing on subsequent loop iters. * * The goal of returning quickly also drives the choice of a "do-while" * loop instead of a "for" loop, and the switch statement optimized for * compilation to a jump table. */ uint8_t *const actions = ivars->actions; uint32_t i = 0; // Iterate through our array of actions, returning as quickly as possible. do { switch (actions[i] & ACTIONS_MASK) { case AUTO_ACCEPT: return true; case AUTO_REJECT: return false; case AUTO_TIE: break; case COMPARE_BY_SCORE: { float score = Matcher_Score(ivars->matcher); if (*(int32_t*)&score == *(int32_t*)&ivars->bubble_score) { break; } if (score > ivars->bubble_score) { MatchDoc_IVARS(ivars->bumped)->score = score; return true; } else if (score < ivars->bubble_score) { return false; } } break; case COMPARE_BY_SCORE_REV: { float score = Matcher_Score(ivars->matcher); if (*(int32_t*)&score == *(int32_t*)&ivars->bubble_score) { break; } if (score < ivars->bubble_score) { MatchDoc_IVARS(ivars->bumped)->score = score; return true; } else if (score > ivars->bubble_score) { return false; } } break; case COMPARE_BY_DOC_ID: if (doc_id > ivars->bubble_doc) { return false; } else if (doc_id < ivars->bubble_doc) { return true; } break; case COMPARE_BY_DOC_ID_REV: if (doc_id > ivars->bubble_doc) { return true; } else if (doc_id < ivars->bubble_doc) { return false; } break; case COMPARE_BY_ORD1: { int32_t comparison = SI_compare_by_ord1( ivars, i, SI_validate_doc_id(ivars, doc_id), ivars->bubble_doc); if (comparison < 0) { return true; } else if (comparison > 0) { return false; } } break; case COMPARE_BY_ORD1_REV: { int32_t comparison = SI_compare_by_ord1( ivars, i, ivars->bubble_doc, SI_validate_doc_id(ivars, doc_id)); if (comparison < 0) { return true; } else if (comparison > 0) { return false; } } break; case COMPARE_BY_ORD2: { int32_t comparison = SI_compare_by_ord2( ivars, i, SI_validate_doc_id(ivars, doc_id), ivars->bubble_doc); if (comparison < 0) { return true; } else if (comparison > 0) { return false; } } break; case COMPARE_BY_ORD2_REV: { int32_t comparison = SI_compare_by_ord2( ivars, i, ivars->bubble_doc, SI_validate_doc_id(ivars, doc_id)); if (comparison < 0) { return true; } else if (comparison > 0) { return false; } } break; case COMPARE_BY_ORD4: { int32_t comparison = SI_compare_by_ord4( ivars, i, SI_validate_doc_id(ivars, doc_id), ivars->bubble_doc); if (comparison < 0) { return true; } else if (comparison > 0) { return false; } } break; case COMPARE_BY_ORD4_REV: { int32_t comparison = SI_compare_by_ord4( ivars, i, ivars->bubble_doc, SI_validate_doc_id(ivars, doc_id)); if (comparison < 0) { return true; } else if (comparison > 0) { return false; } } break; case COMPARE_BY_ORD8: { int32_t comparison = SI_compare_by_ord8( ivars, i, SI_validate_doc_id(ivars, doc_id), ivars->bubble_doc); if (comparison < 0) { return true; } else if (comparison > 0) { return false; } } break; case COMPARE_BY_ORD8_REV: { int32_t comparison = SI_compare_by_ord8( ivars, i, ivars->bubble_doc, SI_validate_doc_id(ivars, doc_id)); if (comparison < 0) { return true; } else if (comparison > 0) { return false; } } break; case COMPARE_BY_ORD16: { int32_t comparison = SI_compare_by_ord16( ivars, i, SI_validate_doc_id(ivars, doc_id), ivars->bubble_doc); if (comparison < 0) { return true; } else if (comparison > 0) { return false; } } break; case COMPARE_BY_ORD16_REV: { int32_t comparison = SI_compare_by_ord16( ivars, i, ivars->bubble_doc, SI_validate_doc_id(ivars, doc_id)); if (comparison < 0) { return true; } else if (comparison > 0) { return false; } } break; case COMPARE_BY_ORD32: { int32_t comparison = SI_compare_by_ord32( ivars, i, SI_validate_doc_id(ivars, doc_id), ivars->bubble_doc); if (comparison < 0) { return true; } else if (comparison > 0) { return false; } } break; case COMPARE_BY_ORD32_REV: { int32_t comparison = SI_compare_by_ord32( ivars, i, ivars->bubble_doc, SI_validate_doc_id(ivars, doc_id)); if (comparison < 0) { return true; } else if (comparison > 0) { return false; } } break; case COMPARE_BY_NATIVE_ORD16: { int32_t comparison = SI_compare_by_native_ord16( ivars, i, SI_validate_doc_id(ivars, doc_id), ivars->bubble_doc); if (comparison < 0) { return true; } else if (comparison > 0) { return false; } } break; case COMPARE_BY_NATIVE_ORD16_REV: { int32_t comparison = SI_compare_by_native_ord16( ivars, i, ivars->bubble_doc, SI_validate_doc_id(ivars, doc_id)); if (comparison < 0) { return true; } else if (comparison > 0) { return false; } } break; case COMPARE_BY_NATIVE_ORD32: { int32_t comparison = SI_compare_by_native_ord32( ivars, i, SI_validate_doc_id(ivars, doc_id), ivars->bubble_doc); if (comparison < 0) { return true; } else if (comparison > 0) { return false; } } break; case COMPARE_BY_NATIVE_ORD32_REV: { int32_t comparison = SI_compare_by_native_ord32( ivars, i, ivars->bubble_doc, SI_validate_doc_id(ivars, doc_id)); if (comparison < 0) { return true; } else if (comparison > 0) { return false; } } break; default: THROW(ERR, "UNEXPECTED action %u8", actions[i]); } } while (++i < ivars->num_actions); // If we've made it this far and we're still tied, reject the doc so that // we prefer items already in the queue. This has the effect of // implicitly breaking ties by doc num, since docs are collected in order. return false; }
void SortColl_collect(SortCollector *self, int32_t doc_id) { SortCollectorIVARS *const ivars = SortColl_IVARS(self); // Add to the total number of hits. ivars->total_hits++; // Collect this hit if it's competitive. if (SI_competitive(ivars, doc_id)) { MatchDoc *const match_doc = ivars->bumped; MatchDocIVARS *const match_doc_ivars = MatchDoc_IVARS(match_doc); match_doc_ivars->doc_id = doc_id + ivars->base; if (ivars->need_score && match_doc_ivars->score == F32_NEGINF) { match_doc_ivars->score = Matcher_Score(ivars->matcher); } // Fetch values so that cross-segment sorting can work. if (ivars->need_values) { VArray *values = match_doc_ivars->values; for (uint32_t i = 0, max = ivars->num_rules; i < max; i++) { SortCache *cache = ivars->sort_caches[i]; Obj *old_val = (Obj*)VA_Delete(values, i); if (cache) { int32_t ord = SortCache_Ordinal(cache, doc_id); Obj *blank = old_val ? old_val : SortCache_Make_Blank(cache); Obj *val = SortCache_Value(cache, ord, blank); if (val) { VA_Store(values, i, (Obj*)val); } else { DECREF(blank); } } } } // Insert the new MatchDoc. ivars->bumped = (MatchDoc*)HitQ_Jostle(ivars->hit_q, (Obj*)match_doc); if (ivars->bumped) { if (ivars->bumped == match_doc) { /* The queue is full, and we have established a threshold for * this segment as to what sort of document is definitely not * acceptable. Turn off AUTO_ACCEPT and start actually * testing whether hits are competitive. */ ivars->bubble_score = match_doc_ivars->score; ivars->bubble_doc = doc_id; ivars->actions = ivars->derived_actions; } // Recycle. MatchDoc_IVARS(ivars->bumped)->score = ivars->need_score ? F32_NEGINF : F32_NAN; } else { // The queue isn't full yet, so create a fresh MatchDoc. VArray *values = ivars->need_values ? VA_new(ivars->num_rules) : NULL; float fake_score = ivars->need_score ? F32_NEGINF : F32_NAN; ivars->bumped = MatchDoc_new(INT32_MAX, fake_score, values); DECREF(values); } } }