void SortWriter_Finish_IMP(SortWriter *self) { SortWriterIVARS *const ivars = SortWriter_IVARS(self); Vector *const field_writers = ivars->field_writers; // If we have no data, bail out. if (!ivars->temp_ord_out) { return; } // If we've either flushed or added segments, flush everything so that any // one field can use the entire margin up to mem_thresh. if (ivars->flush_at_finish) { for (size_t i = 1, max = Vec_Get_Size(field_writers); i < max; i++) { SortFieldWriter *field_writer = (SortFieldWriter*)Vec_Fetch(field_writers, i); if (field_writer) { SortFieldWriter_Flush(field_writer); } } } // Close down temp streams. OutStream_Close(ivars->temp_ord_out); OutStream_Close(ivars->temp_ix_out); OutStream_Close(ivars->temp_dat_out); for (size_t i = 1, max = Vec_Get_Size(field_writers); i < max; i++) { SortFieldWriter *field_writer = (SortFieldWriter*)Vec_Delete(field_writers, i); if (field_writer) { String *field = Seg_Field_Name(ivars->segment, (int32_t)i); SortFieldWriter_Flip(field_writer); int32_t count = SortFieldWriter_Finish(field_writer); Hash_Store(ivars->counts, field, (Obj*)Str_newf("%i32", count)); int32_t null_ord = SortFieldWriter_Get_Null_Ord(field_writer); if (null_ord != -1) { Hash_Store(ivars->null_ords, field, (Obj*)Str_newf("%i32", null_ord)); } int32_t ord_width = SortFieldWriter_Get_Ord_Width(field_writer); Hash_Store(ivars->ord_widths, field, (Obj*)Str_newf("%i32", ord_width)); } DECREF(field_writer); } Vec_Clear(field_writers); // Store metadata. Seg_Store_Metadata_Utf8(ivars->segment, "sort", 4, (Obj*)SortWriter_Metadata(self)); // Clean up. Folder *folder = ivars->folder; String *seg_name = Seg_Get_Name(ivars->segment); String *ord_path = Str_newf("%o/sort_ord_temp", seg_name); Folder_Delete(folder, ord_path); DECREF(ord_path); String *ix_path = Str_newf("%o/sort_ix_temp", seg_name); Folder_Delete(folder, ix_path); DECREF(ix_path); String *dat_path = Str_newf("%o/sort_dat_temp", seg_name); Folder_Delete(folder, dat_path); DECREF(dat_path); }
void SortColl_Collect_IMP(SortCollector *self, int32_t doc_id) { SortCollectorIVARS *const ivars = SortColl_IVARS(self); // Add to the total number of hits. ivars->total_hits++; // Collect this hit if it's competitive. if (SI_competitive(ivars, doc_id)) { MatchDoc *const match_doc = ivars->bumped; MatchDocIVARS *const match_doc_ivars = MatchDoc_IVARS(match_doc); match_doc_ivars->doc_id = doc_id + ivars->base; if (ivars->need_score && match_doc_ivars->score == CHY_F32_NEGINF) { match_doc_ivars->score = Matcher_Score(ivars->matcher); } // Fetch values so that cross-segment sorting can work. if (ivars->need_values) { Vector *values = match_doc_ivars->values; for (uint32_t i = 0, max = ivars->num_rules; i < max; i++) { SortCache *cache = ivars->sort_caches[i]; Obj *old_val = Vec_Delete(values, i); DECREF(old_val); if (cache) { int32_t ord = SortCache_Ordinal(cache, doc_id); Obj *val = SortCache_Value(cache, ord); if (val) { Vec_Store(values, i, (Obj*)val); } } } } // Insert the new MatchDoc. ivars->bumped = (MatchDoc*)HitQ_Jostle(ivars->hit_q, (Obj*)match_doc); if (ivars->bumped) { if (ivars->bumped == match_doc) { /* The queue is full, and we have established a threshold for * this segment as to what sort of document is definitely not * acceptable. Turn off AUTO_ACCEPT and start actually * testing whether hits are competitive. */ ivars->bubble_score = match_doc_ivars->score; ivars->bubble_doc = doc_id; ivars->actions = ivars->derived_actions; } // Recycle. MatchDoc_IVARS(ivars->bumped)->score = ivars->need_score ? CHY_F32_NEGINF : CHY_F32_NAN; } else { // The queue isn't full yet, so create a fresh MatchDoc. Vector *values = ivars->need_values ? Vec_new(ivars->num_rules) : NULL; float fake_score = ivars->need_score ? CHY_F32_NEGINF : CHY_F32_NAN; ivars->bumped = MatchDoc_new(INT32_MAX, fake_score, values); DECREF(values); } } }