static void test_oversize__ceiling(TestBatchRunner *runner) { for (unsigned width = 0; width < 10; width++) { size_t size = Memory_oversize(SIZE_MAX, width); TEST_TRUE(runner, size == SIZE_MAX, "Memory_oversize hits ceiling at SIZE_MAX (width %u)", width); size = Memory_oversize(SIZE_MAX - 1, width); TEST_TRUE(runner, size == SIZE_MAX, "Memory_oversize hits ceiling at SIZE_MAX (width %u)", width); } }
void BitVec_set(BitVector *self, uint32_t tick) { if (tick >= self->cap) { uint32_t new_cap = (uint32_t)Memory_oversize(tick + 1, 0); BitVec_Grow(self, new_cap); } NumUtil_u1set(self->bits, tick); }
static void test_oversize__growth_rate(TestBatchRunner *runner) { bool success = true; uint64_t size = 0; double growth_count = 0; double average_growth_rate = 0.0; while (size < SIZE_MAX) { uint64_t next_size = Memory_oversize((size_t)size + 1, sizeof(void*)); if (next_size < size) { success = false; FAIL(runner, "Asked for %" PRId64 ", got smaller amount %" PRId64, size + 1, next_size); break; } if (size > 0) { growth_count += 1; double growth_rate = CHY_U64_TO_DOUBLE(next_size) / CHY_U64_TO_DOUBLE(size); double sum = growth_rate + (growth_count - 1) * average_growth_rate; average_growth_rate = sum / growth_count; if (average_growth_rate < 1.1) { FAIL(runner, "Average growth rate dropped below 1.1x: %f", average_growth_rate); success = false; break; } } size = next_size; } TEST_TRUE(runner, growth_count > 0, "Grew %f times", growth_count); if (success) { TEST_TRUE(runner, average_growth_rate > 1.1, "Growth rate of oversize() averages above 1.1: %.3f", average_growth_rate); } for (size_t minimum = 1; minimum < 8; minimum++) { uint64_t next_size = Memory_oversize(minimum, sizeof(void*)); double growth_rate = CHY_U64_TO_DOUBLE(next_size) / (double)minimum; TEST_TRUE(runner, growth_rate > 1.2, "Growth rate is higher for smaller arrays (%u, %.3f)", (unsigned)minimum, growth_rate); } }
void BitVec_Set_IMP(BitVector *self, size_t tick) { BitVectorIVARS *const ivars = BitVec_IVARS(self); if (tick >= ivars->cap) { size_t new_cap = (size_t)Memory_oversize(tick + 1, 0); BitVec_Grow(self, new_cap); } NumUtil_u1set(ivars->bits, tick); }
void SortEx_Feed_IMP(SortExternal *self, Obj *item) { SortExternalIVARS *const ivars = SortEx_IVARS(self); if (ivars->buf_max == ivars->buf_cap) { size_t amount = Memory_oversize(ivars->buf_max + 1, sizeof(Obj*)); SortEx_Grow_Buffer(self, amount); } ivars->buffer[ivars->buf_max] = item; ivars->buf_max++; }
static void SI_grow_by(VArray *self, uint32_t add_size) { size_t min_size = self->size + add_size; // Check for overflow. if ((uint32_t)min_size < add_size) { THROW(ERR, "Array grew too large"); } size_t new_size = Memory_oversize(min_size, sizeof(Obj*)); if (new_size > UINT32_MAX) { new_size = UINT32_MAX; } VA_Grow(self, (uint32_t)new_size); }
void SortEx_Feed_IMP(SortExternal *self, void *data) { SortExternalIVARS *const ivars = SortEx_IVARS(self); const size_t width = ivars->width; if (ivars->cache_max == ivars->cache_cap) { size_t amount = Memory_oversize(ivars->cache_max + 1, width); SortEx_Grow_Cache(self, amount); } uint8_t *target = ivars->cache + ivars->cache_max * width; memcpy(target, data, width); ivars->cache_max++; }
void Inversion_Append_IMP(Inversion *self, Token *token) { InversionIVARS *const ivars = Inversion_IVARS(self); if (ivars->inverted) { THROW(ERR, "Can't append tokens after inversion"); } if (ivars->size >= ivars->cap) { size_t new_capacity = Memory_oversize(ivars->size + 1, sizeof(Token*)); S_grow(self, new_capacity); } ivars->tokens[ivars->size] = token; ivars->size++; }
static void test_oversize__rounding(TestBatchRunner *runner) { unsigned widths[] = { 1, 2, 4, 0 }; for (int width_tick = 0; widths[width_tick] != 0; width_tick++) { unsigned width = widths[width_tick]; for (unsigned i = 0; i < 25; i++) { size_t size = Memory_oversize(i, width); size_t bytes = size * width; if (bytes % sizeof(size_t) != 0) { FAIL(runner, "Rounding failure for %u, width %u", i, width); return; } } } PASS(runner, "Round allocations up to the size of a pointer"); }
uint32_t BlobSortEx_Refill_IMP(BlobSortEx *self) { BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self); // Make sure buffer is empty, then set buffer tick vars. if (ivars->buf_max - ivars->buf_tick > 0) { THROW(ERR, "Refill called but buffer contains %u32 items", ivars->buf_max - ivars->buf_tick); } ivars->buf_tick = 0; ivars->buf_max = 0; // Read in elements. while (1) { Blob *elem = NULL; if (ivars->mem_consumed >= ivars->mem_thresh) { ivars->mem_consumed = 0; break; } else if (ivars->external_tick >= Vec_Get_Size(ivars->external)) { break; } else { elem = (Blob*)Vec_Fetch(ivars->external, ivars->external_tick); ivars->external_tick++; // Should be + sizeof(Blob), but that's ok. ivars->mem_consumed += Blob_Get_Size(elem); } if (ivars->buf_max == ivars->buf_cap) { BlobSortEx_Grow_Buffer(self, Memory_oversize(ivars->buf_max + 1, sizeof(Obj*))); } ivars->buffer[ivars->buf_max++] = INCREF(elem); } return ivars->buf_max; }
uint32_t BBSortEx_refill(BBSortEx *self) { // Make sure cache is empty, then set cache tick vars. if (self->cache_max - self->cache_tick > 0) { THROW(ERR, "Refill called but cache contains %u32 items", self->cache_max - self->cache_tick); } self->cache_tick = 0; self->cache_max = 0; // Read in elements. while (1) { ByteBuf *elem = NULL; if (self->mem_consumed >= self->mem_thresh) { self->mem_consumed = 0; break; } else if (self->external_tick >= VA_Get_Size(self->external)) { break; } else { elem = (ByteBuf*)VA_Fetch(self->external, self->external_tick); self->external_tick++; // Should be + sizeof(ByteBuf), but that's ok. self->mem_consumed += BB_Get_Size(elem); } if (self->cache_max == self->cache_cap) { BBSortEx_Grow_Cache(self, Memory_oversize(self->cache_max + 1, self->width)); } Obj **cache = (Obj**)self->cache; cache[self->cache_max++] = INCREF(elem); } return self->cache_max; }
static void S_absorb_slices(SortExternal *self, SortExternalIVARS *ivars, Obj **endpost) { uint32_t num_runs = Vec_Get_Size(ivars->runs); Obj ***slice_starts = ivars->slice_starts; uint32_t *slice_sizes = ivars->slice_sizes; Class *klass = SortEx_get_class(self); CFISH_Sort_Compare_t compare = (CFISH_Sort_Compare_t)METHOD_PTR(klass, LUCY_SortEx_Compare); if (ivars->buf_max != 0) { THROW(ERR, "Can't refill unless empty"); } // Move all the elements in range into the main buffer as slices. for (uint32_t i = 0; i < num_runs; i++) { SortExternal *const run = (SortExternal*)Vec_Fetch(ivars->runs, i); SortExternalIVARS *const run_ivars = SortEx_IVARS(run); uint32_t slice_size = S_find_slice_size(run, run_ivars, endpost); if (slice_size) { // Move slice content from run buffer to main buffer. if (ivars->buf_max + slice_size > ivars->buf_cap) { size_t cap = Memory_oversize(ivars->buf_max + slice_size, sizeof(Obj*)); SortEx_Grow_Buffer(self, cap); } memcpy(ivars->buffer + ivars->buf_max, run_ivars->buffer + run_ivars->buf_tick, slice_size * sizeof(Obj*)); run_ivars->buf_tick += slice_size; ivars->buf_max += slice_size; // Track number of slices and slice sizes. slice_sizes[ivars->num_slices++] = slice_size; } } // Transform slice starts from ticks to pointers. uint32_t total = 0; for (uint32_t i = 0; i < ivars->num_slices; i++) { slice_starts[i] = ivars->buffer + total; total += slice_sizes[i]; } // The main buffer now consists of several slices. Sort the main buffer, // but exploit the fact that each slice is already sorted. if (ivars->scratch_cap < ivars->buf_cap) { ivars->scratch_cap = ivars->buf_cap; ivars->scratch = (Obj**)REALLOCATE( ivars->scratch, ivars->scratch_cap * sizeof(Obj*)); } // Exploit previous sorting, rather than sort buffer naively. // Leave the first slice intact if the number of slices is odd. */ while (ivars->num_slices > 1) { uint32_t i = 0; uint32_t j = 0; while (i < ivars->num_slices) { if (ivars->num_slices - i >= 2) { // Merge two consecutive slices. const uint32_t merged_size = slice_sizes[i] + slice_sizes[i + 1]; Sort_merge(slice_starts[i], slice_sizes[i], slice_starts[i + 1], slice_sizes[i + 1], ivars->scratch, sizeof(Obj*), compare, self); slice_sizes[j] = merged_size; slice_starts[j] = slice_starts[i]; memcpy(slice_starts[j], ivars->scratch, merged_size * sizeof(Obj*)); i += 2; j += 1; } else if (ivars->num_slices - i >= 1) { // Move single slice pointer. slice_sizes[j] = slice_sizes[i]; slice_starts[j] = slice_starts[i]; i += 1; j += 1; } } ivars->num_slices = j; } ivars->num_slices = 0; }
uint32_t PostPool_Refill_IMP(PostingPool *self) { PostingPoolIVARS *const ivars = PostPool_IVARS(self); Lexicon *const lexicon = ivars->lexicon; PostingList *const plist = ivars->plist; I32Array *const doc_map = ivars->doc_map; const uint32_t mem_thresh = ivars->mem_thresh; const int32_t doc_base = ivars->doc_base; uint32_t num_elems = 0; // number of items recovered String *term_text = NULL; if (ivars->lexicon == NULL) { return 0; } else { term_text = (String*)Lex_Get_Term(lexicon); } // Make sure buffer is empty. if (ivars->buf_max - ivars->buf_tick > 0) { THROW(ERR, "Refill called but buffer contains %u32 items", ivars->buf_max - ivars->buf_tick); } ivars->buf_max = 0; ivars->buf_tick = 0; // Ditch old MemoryPool and get another. DECREF(ivars->mem_pool); ivars->mem_pool = MemPool_new(0); MemoryPool *const mem_pool = ivars->mem_pool; MemoryPoolIVARS *const mem_pool_ivars = MemPool_IVARS(mem_pool); while (1) { if (ivars->post_count == 0) { // Read a term. if (Lex_Next(lexicon)) { ivars->post_count = Lex_Doc_Freq(lexicon); term_text = (String*)Lex_Get_Term(lexicon); if (term_text && !Obj_Is_A((Obj*)term_text, STRING)) { THROW(ERR, "Only String terms are supported for now"); } Posting *posting = PList_Get_Posting(plist); Post_Set_Doc_ID(posting, doc_base); ivars->last_doc_id = doc_base; } // Bail if we've read everything in this run. else { break; } } // Bail if we've hit the ceiling for this run's buffer. if (mem_pool_ivars->consumed >= mem_thresh && num_elems > 0) { break; } // Read a posting from the input stream. RawPosting *rawpost = PList_Read_Raw(plist, ivars->last_doc_id, term_text, mem_pool); RawPostingIVARS *const rawpost_ivars = RawPost_IVARS(rawpost); ivars->last_doc_id = rawpost_ivars->doc_id; ivars->post_count--; // Skip deletions. if (doc_map != NULL) { const int32_t remapped = I32Arr_Get(doc_map, rawpost_ivars->doc_id - doc_base); if (!remapped) { continue; } rawpost_ivars->doc_id = remapped; } // Add to the run's buffer. if (num_elems >= ivars->buf_cap) { size_t new_cap = Memory_oversize(num_elems + 1, sizeof(Obj*)); PostPool_Grow_Buffer(self, new_cap); } ivars->buffer[num_elems] = (Obj*)rawpost; num_elems++; } // Reset the buffer array position and length; remember file pos. ivars->buf_max = num_elems; ivars->buf_tick = 0; return num_elems; }