static int S_compare_doc_count(const void *va, const void *vb) { SegReader *a = *(SegReader**)va; SegReader *b = *(SegReader**)vb; return SegReader_Doc_Count(a) - SegReader_Doc_Count(b); }
static int S_compare_doc_count(void *context, const void *va, const void *vb) { SegReader *a = *(SegReader**)va; SegReader *b = *(SegReader**)vb; UNUSED_VAR(context); return SegReader_Doc_Count(a) - SegReader_Doc_Count(b); }
VArray* IxManager_segreaders_to_merge(IndexManager *self, PolyReader *reader, bool_t all) { VArray *seg_readers = VA_Shallow_Copy(PolyReader_Get_Seg_Readers(reader)); UNUSED_VAR(self); if (!all) { u32_t i; u32_t total_docs = 0; u32_t threshold = 0; const u32_t num_seg_readers = VA_Get_Size(seg_readers); /* Sort by ascending size in docs. */ VA_Sort(seg_readers, S_compare_doc_count); /* Find sparsely populated segments. */ for (i = 0; i < num_seg_readers; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(seg_readers, i); total_docs += SegReader_Doc_Count(seg_reader); if (total_docs < Math_fibonacci(i + 5)) { threshold = i + 1; } } VA_Splice(seg_readers, threshold, num_seg_readers); } return seg_readers; }
Vector* IxManager_Recycle_IMP(IndexManager *self, PolyReader *reader, DeletionsWriter *del_writer, int64_t cutoff, bool optimize) { Vector *seg_readers = PolyReader_Get_Seg_Readers(reader); size_t num_seg_readers = Vec_Get_Size(seg_readers); SegReader **candidates = (SegReader**)MALLOCATE(num_seg_readers * sizeof(SegReader*)); size_t num_candidates = 0; for (size_t i = 0; i < num_seg_readers; i++) { SegReader *seg_reader = (SegReader*)Vec_Fetch(seg_readers, i); if (SegReader_Get_Seg_Num(seg_reader) > cutoff) { candidates[num_candidates++] = seg_reader; } } Vector *recyclables = Vec_new(num_candidates); if (optimize) { for (size_t i = 0; i < num_candidates; i++) { Vec_Push(recyclables, INCREF(candidates[i])); } FREEMEM(candidates); return recyclables; } // Sort by ascending size in docs, choose sparsely populated segments. qsort(candidates, num_candidates, sizeof(SegReader*), S_compare_doc_count); int32_t *counts = (int32_t*)MALLOCATE(num_candidates * sizeof(int32_t)); for (uint32_t i = 0; i < num_candidates; i++) { counts[i] = SegReader_Doc_Count(candidates[i]); } I32Array *doc_counts = I32Arr_new_steal(counts, num_candidates); uint32_t threshold = IxManager_Choose_Sparse(self, doc_counts); DECREF(doc_counts); // Move SegReaders to be recycled. for (uint32_t i = 0; i < threshold; i++) { Vec_Store(recyclables, i, INCREF(candidates[i])); } // Find segments where at least 10% of all docs have been deleted. for (uint32_t i = threshold; i < num_candidates; i++) { SegReader *seg_reader = candidates[i]; String *seg_name = SegReader_Get_Seg_Name(seg_reader); double doc_max = SegReader_Doc_Max(seg_reader); double num_deletions = DelWriter_Seg_Del_Count(del_writer, seg_name); double del_proportion = num_deletions / doc_max; if (del_proportion >= 0.1) { Vec_Push(recyclables, INCREF(seg_reader)); } } FREEMEM(candidates); return recyclables; }
VArray* IxManager_recycle(IndexManager *self, PolyReader *reader, DeletionsWriter *del_writer, int64_t cutoff, bool_t optimize) { VArray *seg_readers = PolyReader_Get_Seg_Readers(reader); VArray *candidates = VA_Gather(seg_readers, S_check_cutoff, &cutoff); VArray *recyclables = VA_new(VA_Get_Size(candidates)); const uint32_t num_candidates = VA_Get_Size(candidates); if (optimize) { DECREF(recyclables); return candidates; } // Sort by ascending size in docs, choose sparsely populated segments. VA_Sort(candidates, S_compare_doc_count, NULL); int32_t *counts = (int32_t*)MALLOCATE(num_candidates * sizeof(int32_t)); for (uint32_t i = 0; i < num_candidates; i++) { SegReader *seg_reader = (SegReader*)CERTIFY( VA_Fetch(candidates, i), SEGREADER); counts[i] = SegReader_Doc_Count(seg_reader); } I32Array *doc_counts = I32Arr_new_steal(counts, num_candidates); uint32_t threshold = IxManager_Choose_Sparse(self, doc_counts); DECREF(doc_counts); // Move SegReaders to be recycled. for (uint32_t i = 0; i < threshold; i++) { VA_Store(recyclables, i, VA_Delete(candidates, i)); } // Find segments where at least 10% of all docs have been deleted. for (uint32_t i = threshold; i < num_candidates; i++) { SegReader *seg_reader = (SegReader*)VA_Delete(candidates, i); CharBuf *seg_name = SegReader_Get_Seg_Name(seg_reader); double doc_max = SegReader_Doc_Max(seg_reader); double num_deletions = DelWriter_Seg_Del_Count(del_writer, seg_name); double del_proportion = num_deletions / doc_max; if (del_proportion >= 0.1) { VA_Push(recyclables, (Obj*)seg_reader); } else { DECREF(seg_reader); } } DECREF(candidates); return recyclables; }