Пример #1
0
static void
test_oversize__ceiling(TestBatchRunner *runner) {
    for (unsigned width = 0; width < 10; width++) {
        size_t size = Memory_oversize(SIZE_MAX, width);
        TEST_TRUE(runner, size == SIZE_MAX,
                  "Memory_oversize hits ceiling at SIZE_MAX (width %u)", width);
        size = Memory_oversize(SIZE_MAX - 1, width);
        TEST_TRUE(runner, size == SIZE_MAX,
                  "Memory_oversize hits ceiling at SIZE_MAX (width %u)", width);
    }
}
Пример #2
0
void
BitVec_set(BitVector *self, uint32_t tick) {
    if (tick >= self->cap) {
        uint32_t new_cap = (uint32_t)Memory_oversize(tick + 1, 0);
        BitVec_Grow(self, new_cap);
    }
    NumUtil_u1set(self->bits, tick);
}
Пример #3
0
static void
test_oversize__growth_rate(TestBatchRunner *runner) {
    bool     success             = true;
    uint64_t size                = 0;
    double   growth_count        = 0;
    double   average_growth_rate = 0.0;

    while (size < SIZE_MAX) {
        uint64_t next_size = Memory_oversize((size_t)size + 1, sizeof(void*));
        if (next_size < size) {
            success = false;
            FAIL(runner, "Asked for %" PRId64 ", got smaller amount %" PRId64,
                 size + 1, next_size);
            break;
        }
        if (size > 0) {
            growth_count += 1;
            double growth_rate = CHY_U64_TO_DOUBLE(next_size) /
                                 CHY_U64_TO_DOUBLE(size);
            double sum = growth_rate + (growth_count - 1) * average_growth_rate;
            average_growth_rate = sum / growth_count;
            if (average_growth_rate < 1.1) {
                FAIL(runner, "Average growth rate dropped below 1.1x: %f",
                     average_growth_rate);
                success = false;
                break;
            }
        }
        size = next_size;
    }
    TEST_TRUE(runner, growth_count > 0, "Grew %f times", growth_count);
    if (success) {
        TEST_TRUE(runner, average_growth_rate > 1.1,
                  "Growth rate of oversize() averages above 1.1: %.3f",
                  average_growth_rate);
    }

    for (size_t minimum = 1; minimum < 8; minimum++) {
        uint64_t next_size = Memory_oversize(minimum, sizeof(void*));
        double growth_rate = CHY_U64_TO_DOUBLE(next_size) / (double)minimum;
        TEST_TRUE(runner, growth_rate > 1.2,
                  "Growth rate is higher for smaller arrays (%u, %.3f)",
                  (unsigned)minimum, growth_rate);
    }
}
Пример #4
0
void
BitVec_Set_IMP(BitVector *self, size_t tick) {
    BitVectorIVARS *const ivars = BitVec_IVARS(self);
    if (tick >= ivars->cap) {
        size_t new_cap = (size_t)Memory_oversize(tick + 1, 0);
        BitVec_Grow(self, new_cap);
    }
    NumUtil_u1set(ivars->bits, tick);
}
Пример #5
0
void
SortEx_Feed_IMP(SortExternal *self, Obj *item) {
    SortExternalIVARS *const ivars = SortEx_IVARS(self);
    if (ivars->buf_max == ivars->buf_cap) {
        size_t amount = Memory_oversize(ivars->buf_max + 1, sizeof(Obj*));
        SortEx_Grow_Buffer(self, amount);
    }
    ivars->buffer[ivars->buf_max] = item;
    ivars->buf_max++;
}
Пример #6
0
static void
SI_grow_by(VArray *self, uint32_t add_size) {
    size_t min_size = self->size + add_size;
    // Check for overflow.
    if ((uint32_t)min_size < add_size) {
        THROW(ERR, "Array grew too large");
    }
    size_t new_size = Memory_oversize(min_size, sizeof(Obj*));
    if (new_size > UINT32_MAX) { new_size = UINT32_MAX; }
    VA_Grow(self, (uint32_t)new_size);
}
Пример #7
0
void
SortEx_Feed_IMP(SortExternal *self, void *data) {
    SortExternalIVARS *const ivars = SortEx_IVARS(self);
    const size_t width = ivars->width;
    if (ivars->cache_max == ivars->cache_cap) {
        size_t amount = Memory_oversize(ivars->cache_max + 1, width);
        SortEx_Grow_Cache(self, amount);
    }
    uint8_t *target = ivars->cache + ivars->cache_max * width;
    memcpy(target, data, width);
    ivars->cache_max++;
}
Пример #8
0
void
Inversion_Append_IMP(Inversion *self, Token *token) {
    InversionIVARS *const ivars = Inversion_IVARS(self);
    if (ivars->inverted) {
        THROW(ERR, "Can't append tokens after inversion");
    }
    if (ivars->size >= ivars->cap) {
        size_t new_capacity = Memory_oversize(ivars->size + 1, sizeof(Token*));
        S_grow(self, new_capacity);
    }
    ivars->tokens[ivars->size] = token;
    ivars->size++;
}
Пример #9
0
static void
test_oversize__rounding(TestBatchRunner *runner) {
    unsigned widths[] = { 1, 2, 4, 0 };

    for (int width_tick = 0; widths[width_tick] != 0; width_tick++) {
        unsigned width = widths[width_tick];
        for (unsigned i = 0; i < 25; i++) {
            size_t size = Memory_oversize(i, width);
            size_t bytes = size * width;
            if (bytes % sizeof(size_t) != 0) {
                FAIL(runner, "Rounding failure for %u, width %u",
                     i, width);
                return;
            }
        }
    }
    PASS(runner, "Round allocations up to the size of a pointer");
}
Пример #10
0
uint32_t
BlobSortEx_Refill_IMP(BlobSortEx *self) {
    BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self);

    // Make sure buffer is empty, then set buffer tick vars.
    if (ivars->buf_max - ivars->buf_tick > 0) {
        THROW(ERR, "Refill called but buffer contains %u32 items",
              ivars->buf_max - ivars->buf_tick);
    }
    ivars->buf_tick = 0;
    ivars->buf_max  = 0;

    // Read in elements.
    while (1) {
        Blob *elem = NULL;

        if (ivars->mem_consumed >= ivars->mem_thresh) {
            ivars->mem_consumed = 0;
            break;
        }
        else if (ivars->external_tick >= Vec_Get_Size(ivars->external)) {
            break;
        }
        else {
            elem = (Blob*)Vec_Fetch(ivars->external, ivars->external_tick);
            ivars->external_tick++;
            // Should be + sizeof(Blob), but that's ok.
            ivars->mem_consumed += Blob_Get_Size(elem);
        }

        if (ivars->buf_max == ivars->buf_cap) {
            BlobSortEx_Grow_Buffer(self,
                                 Memory_oversize(ivars->buf_max + 1,
                                                 sizeof(Obj*)));
        }
        ivars->buffer[ivars->buf_max++] = INCREF(elem);
    }

    return ivars->buf_max;
}
Пример #11
0
uint32_t
BBSortEx_refill(BBSortEx *self) {
    // Make sure cache is empty, then set cache tick vars.
    if (self->cache_max - self->cache_tick > 0) {
        THROW(ERR, "Refill called but cache contains %u32 items",
              self->cache_max - self->cache_tick);
    }
    self->cache_tick = 0;
    self->cache_max  = 0;

    // Read in elements.
    while (1) {
        ByteBuf *elem = NULL;

        if (self->mem_consumed >= self->mem_thresh) {
            self->mem_consumed = 0;
            break;
        }
        else if (self->external_tick >= VA_Get_Size(self->external)) {
            break;
        }
        else {
            elem = (ByteBuf*)VA_Fetch(self->external, self->external_tick);
            self->external_tick++;
            // Should be + sizeof(ByteBuf), but that's ok.
            self->mem_consumed += BB_Get_Size(elem);
        }

        if (self->cache_max == self->cache_cap) {
            BBSortEx_Grow_Cache(self,
                                Memory_oversize(self->cache_max + 1, self->width));
        }
        Obj **cache = (Obj**)self->cache;
        cache[self->cache_max++] = INCREF(elem);
    }

    return self->cache_max;
}
Пример #12
0
static void
S_absorb_slices(SortExternal *self, SortExternalIVARS *ivars,
                Obj **endpost) {
    uint32_t    num_runs     = Vec_Get_Size(ivars->runs);
    Obj      ***slice_starts = ivars->slice_starts;
    uint32_t   *slice_sizes  = ivars->slice_sizes;
    Class      *klass        = SortEx_get_class(self);
    CFISH_Sort_Compare_t compare
        = (CFISH_Sort_Compare_t)METHOD_PTR(klass, LUCY_SortEx_Compare);

    if (ivars->buf_max != 0) { THROW(ERR, "Can't refill unless empty"); }

    // Move all the elements in range into the main buffer as slices.
    for (uint32_t i = 0; i < num_runs; i++) {
        SortExternal *const run = (SortExternal*)Vec_Fetch(ivars->runs, i);
        SortExternalIVARS *const run_ivars = SortEx_IVARS(run);
        uint32_t slice_size = S_find_slice_size(run, run_ivars, endpost);

        if (slice_size) {
            // Move slice content from run buffer to main buffer.
            if (ivars->buf_max + slice_size > ivars->buf_cap) {
                size_t cap = Memory_oversize(ivars->buf_max + slice_size,
                                             sizeof(Obj*));
                SortEx_Grow_Buffer(self, cap);
            }
            memcpy(ivars->buffer + ivars->buf_max,
                   run_ivars->buffer + run_ivars->buf_tick,
                   slice_size * sizeof(Obj*));
            run_ivars->buf_tick += slice_size;
            ivars->buf_max += slice_size;

            // Track number of slices and slice sizes.
            slice_sizes[ivars->num_slices++] = slice_size;
        }
    }

    // Transform slice starts from ticks to pointers.
    uint32_t total = 0;
    for (uint32_t i = 0; i < ivars->num_slices; i++) {
        slice_starts[i] = ivars->buffer + total;
        total += slice_sizes[i];
    }

    // The main buffer now consists of several slices.  Sort the main buffer,
    // but exploit the fact that each slice is already sorted.
    if (ivars->scratch_cap < ivars->buf_cap) {
        ivars->scratch_cap = ivars->buf_cap;
        ivars->scratch = (Obj**)REALLOCATE(
                            ivars->scratch, ivars->scratch_cap * sizeof(Obj*));
    }

    // Exploit previous sorting, rather than sort buffer naively.
    // Leave the first slice intact if the number of slices is odd. */
    while (ivars->num_slices > 1) {
        uint32_t i = 0;
        uint32_t j = 0;

        while (i < ivars->num_slices) {
            if (ivars->num_slices - i >= 2) {
                // Merge two consecutive slices.
                const uint32_t merged_size = slice_sizes[i] + slice_sizes[i + 1];
                Sort_merge(slice_starts[i], slice_sizes[i],
                           slice_starts[i + 1], slice_sizes[i + 1], ivars->scratch,
                           sizeof(Obj*), compare, self);
                slice_sizes[j]  = merged_size;
                slice_starts[j] = slice_starts[i];
                memcpy(slice_starts[j], ivars->scratch, merged_size * sizeof(Obj*));
                i += 2;
                j += 1;
            }
            else if (ivars->num_slices - i >= 1) {
                // Move single slice pointer.
                slice_sizes[j]  = slice_sizes[i];
                slice_starts[j] = slice_starts[i];
                i += 1;
                j += 1;
            }
        }
        ivars->num_slices = j;
    }

    ivars->num_slices = 0;
}
Пример #13
0
uint32_t
PostPool_Refill_IMP(PostingPool *self) {
    PostingPoolIVARS *const ivars = PostPool_IVARS(self);
    Lexicon *const     lexicon     = ivars->lexicon;
    PostingList *const plist       = ivars->plist;
    I32Array    *const doc_map     = ivars->doc_map;
    const uint32_t     mem_thresh  = ivars->mem_thresh;
    const int32_t      doc_base    = ivars->doc_base;
    uint32_t           num_elems   = 0; // number of items recovered
    String            *term_text   = NULL;

    if (ivars->lexicon == NULL) { return 0; }
    else { term_text = (String*)Lex_Get_Term(lexicon); }

    // Make sure buffer is empty.
    if (ivars->buf_max - ivars->buf_tick > 0) {
        THROW(ERR, "Refill called but buffer contains %u32 items",
              ivars->buf_max - ivars->buf_tick);
    }
    ivars->buf_max  = 0;
    ivars->buf_tick = 0;

    // Ditch old MemoryPool and get another.
    DECREF(ivars->mem_pool);
    ivars->mem_pool = MemPool_new(0);
    MemoryPool *const mem_pool = ivars->mem_pool;
    MemoryPoolIVARS *const mem_pool_ivars = MemPool_IVARS(mem_pool);


    while (1) {
        if (ivars->post_count == 0) {
            // Read a term.
            if (Lex_Next(lexicon)) {
                ivars->post_count = Lex_Doc_Freq(lexicon);
                term_text = (String*)Lex_Get_Term(lexicon);
                if (term_text && !Obj_Is_A((Obj*)term_text, STRING)) {
                    THROW(ERR, "Only String terms are supported for now");
                }
                Posting *posting = PList_Get_Posting(plist);
                Post_Set_Doc_ID(posting, doc_base);
                ivars->last_doc_id = doc_base;
            }
            // Bail if we've read everything in this run.
            else {
                break;
            }
        }

        // Bail if we've hit the ceiling for this run's buffer.
        if (mem_pool_ivars->consumed >= mem_thresh && num_elems > 0) {
            break;
        }

        // Read a posting from the input stream.
        RawPosting *rawpost
            = PList_Read_Raw(plist, ivars->last_doc_id, term_text, mem_pool);
        RawPostingIVARS *const rawpost_ivars = RawPost_IVARS(rawpost);
        ivars->last_doc_id = rawpost_ivars->doc_id;
        ivars->post_count--;

        // Skip deletions.
        if (doc_map != NULL) {
            const int32_t remapped
                = I32Arr_Get(doc_map, rawpost_ivars->doc_id - doc_base);
            if (!remapped) {
                continue;
            }
            rawpost_ivars->doc_id = remapped;
        }

        // Add to the run's buffer.
        if (num_elems >= ivars->buf_cap) {
            size_t new_cap = Memory_oversize(num_elems + 1, sizeof(Obj*));
            PostPool_Grow_Buffer(self, new_cap);
        }
        ivars->buffer[num_elems] = (Obj*)rawpost;
        num_elems++;
    }

    // Reset the buffer array position and length; remember file pos.
    ivars->buf_max   = num_elems;
    ivars->buf_tick  = 0;

    return num_elems;
}