Exemplo n.º 1
0
static void
test_Equals(TestBatchRunner *runner) {
    ByteBuf *bb = BB_new_bytes("foo", 4); // Include terminating NULL.

    {
        ByteBuf *other = BB_new_bytes("foo", 4);
        TEST_TRUE(runner, BB_Equals(bb, (Obj*)other), "Equals");
        DECREF(other);
    }

    TEST_TRUE(runner, BB_Equals_Bytes(bb, "foo", 4), "Equals_Bytes");
    TEST_FALSE(runner, BB_Equals_Bytes(bb, "foo", 3),
               "Equals_Bytes spoiled by different size");
    TEST_FALSE(runner, BB_Equals_Bytes(bb, "bar", 4),
               "Equals_Bytes spoiled by different content");

    {
        ByteBuf *other = BB_new_bytes("foo", 3);
        TEST_FALSE(runner, BB_Equals(bb, (Obj*)other),
                   "Different size spoils Equals");
        DECREF(other);
    }

    {
        ByteBuf *other = BB_new_bytes("bar", 4);
        TEST_UINT_EQ(runner, BB_Get_Size(bb), BB_Get_Size(other),
                     "same length");
        TEST_FALSE(runner, BB_Equals(bb, (Obj*)other),
                   "Different content spoils Equals");
        DECREF(other);
    }

    DECREF(bb);
}
Exemplo n.º 2
0
static void
test_Equals(TestBatchRunner *runner) {
    ByteBuf *wanted = BB_new_bytes("foo", 4); // Include terminating NULL.
    ByteBuf *got    = BB_new_bytes("foo", 4);

    TEST_TRUE(runner, BB_Equals(wanted, (Obj*)got), "Equals");
    TEST_INT_EQ(runner, BB_Hash_Sum(got), BB_Hash_Sum(wanted), "Hash_Sum");

    TEST_TRUE(runner, BB_Equals_Bytes(got, "foo", 4), "Equals_Bytes");
    TEST_FALSE(runner, BB_Equals_Bytes(got, "foo", 3),
               "Equals_Bytes spoiled by different size");
    TEST_FALSE(runner, BB_Equals_Bytes(got, "bar", 4),
               "Equals_Bytes spoiled by different content");

    BB_Set_Size(got, 3);
    TEST_FALSE(runner, BB_Equals(wanted, (Obj*)got),
               "Different size spoils Equals");
    TEST_FALSE(runner, BB_Hash_Sum(got) == BB_Hash_Sum(wanted),
               "Different size spoils Hash_Sum (probably -- at least this one)");

    BB_Mimic_Bytes(got, "bar", 4);
    TEST_INT_EQ(runner, BB_Get_Size(wanted), BB_Get_Size(got),
                "same length");
    TEST_FALSE(runner, BB_Equals(wanted, (Obj*)got),
               "Different content spoils Equals");

    DECREF(got);
    DECREF(wanted);
}
Exemplo n.º 3
0
static void
S_init_arena(MemoryPool *self, size_t amount) {
    ByteBuf *bb;

    // Indicate which arena we're using at present.
    self->tick++;

    if (self->tick < (int32_t)VA_Get_Size(self->arenas)) {
        // In recycle mode, use previously acquired memory.
        bb = (ByteBuf*)VA_Fetch(self->arenas, self->tick);
        if (amount >= BB_Get_Size(bb)) {
            BB_Grow(bb, amount);
            BB_Set_Size(bb, amount);
        }
    }
    else {
        // In add mode, get more mem from system.
        size_t buf_size = (amount + 1) > self->arena_size
                          ? (amount + 1)
                          : self->arena_size;
        char *ptr = (char*)MALLOCATE(buf_size);
        bb = BB_new_steal_bytes(ptr, buf_size - 1, buf_size);
        VA_Push(self->arenas, (Obj*)bb);
    }

    // Recalculate consumption to take into account blocked off space.
    self->consumed = 0;
    for (int32_t i = 0; i < self->tick; i++) {
        ByteBuf *bb = (ByteBuf*)VA_Fetch(self->arenas, i);
        self->consumed += BB_Get_Size(bb);
    }

    self->buf   = BB_Get_Buf(bb);
    self->limit = self->buf + BB_Get_Size(bb);
}
Exemplo n.º 4
0
void
BBSortEx_feed(BBSortEx *self, void *data) {
    SortEx_feed((SortExternal*)self, data);

    // Flush() if necessary.
    ByteBuf *bytebuf = (ByteBuf*)CERTIFY(*(ByteBuf**)data, BYTEBUF);
    self->mem_consumed += BB_Get_Size(bytebuf);
    if (self->mem_consumed >= self->mem_thresh) {
        BBSortEx_Flush(self);
    }
}
Exemplo n.º 5
0
static void
S_init_arena(MemoryPool *self, size_t amount)
{
    ByteBuf *bb;
    i32_t i;

    /* Indicate which arena we're using at present. */
    self->tick++;

    if (self->tick < (i32_t)VA_Get_Size(self->arenas)) {
        /* In recycle mode, use previously acquired memory. */
        bb = (ByteBuf*)VA_Fetch(self->arenas, self->tick);
        if (amount >= BB_Get_Size(bb)) {
            BB_Grow(bb, amount);
            BB_Set_Size(bb, amount);
        }
    }
    else {
        /* In add mode, get more mem from system. */
        size_t buf_size = (amount + 1) > self->arena_size 
            ? (amount + 1)
            : self->arena_size;
        char *ptr       = MALLOCATE(buf_size, char);
        if (ptr == NULL)
            THROW("Failed to allocate memory");
        bb = BB_new_steal_str(ptr, buf_size - 1, buf_size);
        VA_Push(self->arenas, (Obj*)bb);
    }

    /* Recalculate consumption to take into account blocked off space. */
    self->consumed = 0;
    for (i = 0; i < self->tick; i++) {
        ByteBuf *bb = (ByteBuf*)VA_Fetch(self->arenas, i);
        self->consumed += BB_Get_Size(bb);
    }

    self->buf   = bb->ptr;
    self->limit = BBEND(bb);
}
Exemplo n.º 6
0
void
BBSortEx_Feed_IMP(BBSortEx *self, Obj *item) {
    BBSortExIVARS *const ivars = BBSortEx_IVARS(self);
    BBSortEx_Feed_t super_feed
        = SUPER_METHOD_PTR(BBSORTEX, LUCY_BBSortEx_Feed);
    super_feed(self, item);

    // Flush() if necessary.
    ByteBuf *bytebuf = (ByteBuf*)CERTIFY(item, BYTEBUF);
    ivars->mem_consumed += BB_Get_Size(bytebuf);
    if (ivars->mem_consumed >= ivars->mem_thresh) {
        BBSortEx_Flush(self);
    }
}
Exemplo n.º 7
0
static void
test_Mimic(TestBatchRunner *runner) {
    ByteBuf *a = BB_new_bytes("foo", 3);
    ByteBuf *b = BB_new(0);

    BB_Mimic(b, (Obj*)a);
    TEST_TRUE(runner, BB_Equals(a, (Obj*)b), "Mimic");

    BB_Mimic_Bytes(a, "bar", 4);
    TEST_TRUE(runner, strcmp(BB_Get_Buf(a), "bar") == 0,
              "Mimic_Bytes content");
    TEST_INT_EQ(runner, BB_Get_Size(a), 4, "Mimic_Bytes size");

    BB_Mimic(b, (Obj*)a);
    TEST_TRUE(runner, BB_Equals(a, (Obj*)b), "Mimic");

    DECREF(a);
    DECREF(b);
}
Exemplo n.º 8
0
static TermVector*
S_extract_tv_from_tv_buf(String *field, String *term_text,
                         ByteBuf *tv_buf) {
    TermVector *retval      = NULL;
    const char *posdata     = BB_Get_Buf(tv_buf);
    const char *posdata_end = posdata + BB_Get_Size(tv_buf);
    int32_t    *positions   = NULL;
    int32_t    *starts      = NULL;
    int32_t    *ends        = NULL;
    uint32_t    num_pos     = 0;

    if (posdata != posdata_end) {
        num_pos   = NumUtil_decode_c32(&posdata);
        positions = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t));
        starts    = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t));
        ends      = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t));
    }

    // Expand C32s.
    for (uint32_t i = 0; i < num_pos; i++) {
        positions[i] = NumUtil_decode_c32(&posdata);
        starts[i]    = NumUtil_decode_c32(&posdata);
        ends[i]      = NumUtil_decode_c32(&posdata);
    }

    if (posdata != posdata_end) {
        THROW(ERR, "Bad encoding of posdata");
    }
    else {
        I32Array *posits_map = I32Arr_new_steal(positions, num_pos);
        I32Array *starts_map = I32Arr_new_steal(starts, num_pos);
        I32Array *ends_map   = I32Arr_new_steal(ends, num_pos);
        retval = TV_new(field, term_text, posits_map, starts_map, ends_map);
        DECREF(posits_map);
        DECREF(starts_map);
        DECREF(ends_map);
    }

    return retval;
}
Exemplo n.º 9
0
uint32_t
BBSortEx_Refill_IMP(BBSortEx *self) {
    BBSortExIVARS *const ivars = BBSortEx_IVARS(self);

    // Make sure buffer is empty, then set buffer tick vars.
    if (ivars->buf_max - ivars->buf_tick > 0) {
        THROW(ERR, "Refill called but buffer contains %u32 items",
              ivars->buf_max - ivars->buf_tick);
    }
    ivars->buf_tick = 0;
    ivars->buf_max  = 0;

    // Read in elements.
    while (1) {
        ByteBuf *elem = NULL;

        if (ivars->mem_consumed >= ivars->mem_thresh) {
            ivars->mem_consumed = 0;
            break;
        }
        else if (ivars->external_tick >= VA_Get_Size(ivars->external)) {
            break;
        }
        else {
            elem = (ByteBuf*)VA_Fetch(ivars->external, ivars->external_tick);
            ivars->external_tick++;
            // Should be + sizeof(ByteBuf), but that's ok.
            ivars->mem_consumed += BB_Get_Size(elem);
        }

        if (ivars->buf_max == ivars->buf_cap) {
            BBSortEx_Grow_Buffer(self,
                                 Memory_oversize(ivars->buf_max + 1,
                                                 sizeof(Obj*)));
        }
        ivars->buffer[ivars->buf_max++] = INCREF(elem);
    }

    return ivars->buf_max;
}
Exemplo n.º 10
0
uint32_t
BBSortEx_refill(BBSortEx *self) {
    // Make sure cache is empty, then set cache tick vars.
    if (self->cache_max - self->cache_tick > 0) {
        THROW(ERR, "Refill called but cache contains %u32 items",
              self->cache_max - self->cache_tick);
    }
    self->cache_tick = 0;
    self->cache_max  = 0;

    // Read in elements.
    while (1) {
        ByteBuf *elem = NULL;

        if (self->mem_consumed >= self->mem_thresh) {
            self->mem_consumed = 0;
            break;
        }
        else if (self->external_tick >= VA_Get_Size(self->external)) {
            break;
        }
        else {
            elem = (ByteBuf*)VA_Fetch(self->external, self->external_tick);
            self->external_tick++;
            // Should be + sizeof(ByteBuf), but that's ok.
            self->mem_consumed += BB_Get_Size(elem);
        }

        if (self->cache_max == self->cache_cap) {
            BBSortEx_Grow_Cache(self,
                                Memory_oversize(self->cache_max + 1, self->width));
        }
        Obj **cache = (Obj**)self->cache;
        cache[self->cache_max++] = INCREF(elem);
    }

    return self->cache_max;
}
Exemplo n.º 11
0
void
HLWriter_Add_Segment_IMP(HighlightWriter *self, SegReader *reader,
                         I32Array *doc_map) {
    HighlightWriterIVARS *const ivars = HLWriter_IVARS(self);
    int32_t doc_max = SegReader_Doc_Max(reader);

    if (doc_max == 0) {
        // Bail if the supplied segment is empty.
        return;
    }
    else {
        DefaultHighlightReader *hl_reader
            = (DefaultHighlightReader*)CERTIFY(
                  SegReader_Obtain(reader, Class_Get_Name(HIGHLIGHTREADER)),
                  DEFAULTHIGHLIGHTREADER);
        OutStream *dat_out = S_lazy_init(self);
        OutStream *ix_out  = ivars->ix_out;
        int32_t    orig;
        ByteBuf   *bb = BB_new(0);

        for (orig = 1; orig <= doc_max; orig++) {
            // Skip deleted docs.
            if (doc_map && !I32Arr_Get(doc_map, orig)) {
                continue;
            }

            // Write file pointer.
            OutStream_Write_I64(ix_out, OutStream_Tell(dat_out));

            // Copy the raw record.
            DefHLReader_Read_Record(hl_reader, orig, bb);
            OutStream_Write_Bytes(dat_out, BB_Get_Buf(bb), BB_Get_Size(bb));

            BB_Set_Size(bb, 0);
        }
        DECREF(bb);
    }
}
Exemplo n.º 12
0
void
DocWriter_Add_Segment_IMP(DocWriter *self, SegReader *reader,
                          I32Array *doc_map) {
    DocWriterIVARS *const ivars = DocWriter_IVARS(self);
    int32_t doc_max = SegReader_Doc_Max(reader);

    if (doc_max == 0) {
        // Bail if the supplied segment is empty.
        return;
    }
    else {
        OutStream *const dat_out = S_lazy_init(self);
        OutStream *const ix_out  = ivars->ix_out;
        ByteBuf   *const buffer  = BB_new(0);
        DefaultDocReader *const doc_reader
            = (DefaultDocReader*)CERTIFY(
                  SegReader_Obtain(reader, VTable_Get_Name(DOCREADER)),
                  DEFAULTDOCREADER);

        for (int32_t i = 1, max = SegReader_Doc_Max(reader); i <= max; i++) {
            if (I32Arr_Get(doc_map, i)) {
                int64_t  start = OutStream_Tell(dat_out);

                // Copy record over.
                DefDocReader_Read_Record(doc_reader, buffer, i);
                char *buf   = BB_Get_Buf(buffer);
                size_t size = BB_Get_Size(buffer);
                OutStream_Write_Bytes(dat_out, buf, size);

                // Write file pointer.
                OutStream_Write_I64(ix_out, start);
            }
        }

        DECREF(buffer);
    }
}
Exemplo n.º 13
0
ByteBuf*
HLWriter_TV_Buf_IMP(HighlightWriter *self, Inversion *inversion) {
    const char *last_text = "";
    size_t      last_len = 0;
    ByteBuf    *tv_buf = BB_new(20 + Inversion_Get_Size(inversion) * 8);
    uint32_t    num_postings = 0;
    Token     **tokens;
    uint32_t    freq;
    UNUSED_VAR(self);

    // Leave space for a c32 indicating the number of postings.
    BB_Set_Size(tv_buf, C32_MAX_BYTES);

    Inversion_Reset(inversion);
    while ((tokens = Inversion_Next_Cluster(inversion, &freq)) != NULL) {
        Token *token = *tokens;
        char *const   token_text = Token_Get_Text(token);
        const int32_t token_len  = Token_Get_Len(token);

        int32_t overlap = StrHelp_overlap(last_text, token_text,
                                          last_len, token_len);
        char *ptr;
        char *orig;
        size_t old_size = BB_Get_Size(tv_buf);
        size_t new_size = old_size
                          + C32_MAX_BYTES      // overlap
                          + C32_MAX_BYTES      // length of string diff
                          + (token_len - overlap)        // diff char data
                          + C32_MAX_BYTES                // num prox
                          + (C32_MAX_BYTES * freq * 3);  // pos data

        // Allocate for worst-case scenario.
        ptr  = BB_Grow(tv_buf, new_size);
        orig = ptr;
        ptr += old_size;

        // Track number of postings.
        num_postings += 1;

        // Append the string diff to the tv_buf.
        NumUtil_encode_c32(overlap, &ptr);
        NumUtil_encode_c32((token_len - overlap), &ptr);
        memcpy(ptr, (token_text + overlap), (token_len - overlap));
        ptr += token_len - overlap;

        // Save text and text_len for comparison next loop.
        last_text = token_text;
        last_len  = token_len;

        // Append the number of positions for this term.
        NumUtil_encode_c32(freq, &ptr);

        do {
            // Add position, start_offset, and end_offset to tv_buf.
            NumUtil_encode_c32(Token_Get_Pos(token), &ptr);
            NumUtil_encode_c32(Token_Get_Start_Offset(token), &ptr);
            NumUtil_encode_c32(Token_Get_End_Offset(token), &ptr);
        } while (--freq && (token = *++tokens));

        // Set new byte length.
        BB_Set_Size(tv_buf, ptr - orig);
    }

    // Go back and start the term vector string with the posting count.
    char *dest = BB_Get_Buf(tv_buf);
    NumUtil_encode_padded_c32(num_postings, &dest);

    return tv_buf;
}
Exemplo n.º 14
0
void
DocWriter_Add_Inverted_Doc_IMP(DocWriter *self, Inverter *inverter,
                               int32_t doc_id) {
    DocWriterIVARS *const ivars = DocWriter_IVARS(self);
    OutStream *dat_out    = S_lazy_init(self);
    OutStream *ix_out     = ivars->ix_out;
    uint32_t   num_stored = 0;
    int64_t    start      = OutStream_Tell(dat_out);
    int64_t    expected   = OutStream_Tell(ix_out) / 8;

    // Verify doc id.
    if (doc_id != expected) {
        THROW(ERR, "Expected doc id %i64 but got %i32", expected, doc_id);
    }

    // Write the number of stored fields.
    Inverter_Iterate(inverter);
    while (Inverter_Next(inverter)) {
        FieldType *type = Inverter_Get_Type(inverter);
        if (FType_Stored(type)) { num_stored++; }
    }
    OutStream_Write_C32(dat_out, num_stored);

    Inverter_Iterate(inverter);
    while (Inverter_Next(inverter)) {
        // Only store fields marked as "stored".
        FieldType *type = Inverter_Get_Type(inverter);
        if (FType_Stored(type)) {
            String *field = Inverter_Get_Field_Name(inverter);
            Obj *value = Inverter_Get_Value(inverter);
            Freezer_serialize_string(field, dat_out);
            switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) {
                case FType_TEXT: {
                    const char *buf  = Str_Get_Ptr8((String*)value);
                    size_t      size = Str_Get_Size((String*)value);
                    OutStream_Write_C32(dat_out, size);
                    OutStream_Write_Bytes(dat_out, buf, size);
                    break;
                }
                case FType_BLOB: {
                    char   *buf  = BB_Get_Buf((ByteBuf*)value);
                    size_t  size = BB_Get_Size((ByteBuf*)value);
                    OutStream_Write_C32(dat_out, size);
                    OutStream_Write_Bytes(dat_out, buf, size);
                    break;
                }
                case FType_INT32: {
                    int32_t val = Int32_Get_Value((Integer32*)value);
                    OutStream_Write_C32(dat_out, val);
                    break;
                }
                case FType_INT64: {
                    int64_t val = Int64_Get_Value((Integer64*)value);
                    OutStream_Write_C64(dat_out, val);
                    break;
                }
                case FType_FLOAT32: {
                    float val = Float32_Get_Value((Float32*)value);
                    OutStream_Write_F32(dat_out, val);
                    break;
                }
                case FType_FLOAT64: {
                    double val = Float64_Get_Value((Float64*)value);
                    OutStream_Write_F64(dat_out, val);
                    break;
                }
                default:
                    THROW(ERR, "Unrecognized type: %o", type);
            }
        }
    }

    // Write file pointer.
    OutStream_Write_I64(ix_out, start);
}
Exemplo n.º 15
0
ByteBuf*
HLWriter_tv_buf(HighlightWriter *self, Inversion *inversion)
{
    char        *last_text = "";
    size_t       last_len = 0;
    ByteBuf     *tv_buf = BB_new(20 + inversion->size * 8); /* generous */
    u32_t        num_postings = 0;
    char        *dest;
    Token      **tokens;
    u32_t        freq;
    UNUSED_VAR(self); /* heh. */

    /* Leave space for a c32 indicating the number of postings. */
    BB_Set_Size(tv_buf, C32_MAX_BYTES);

    Inversion_Reset(inversion);
    while ( (tokens = Inversion_Next_Cluster(inversion, &freq)) != NULL ) {
        Token *token = *tokens;
        i32_t overlap = StrHelp_string_diff(last_text, token->text, 
            last_len, token->len);
        char *ptr;
        size_t new_size =   BB_Get_Size(tv_buf)
                          + C32_MAX_BYTES      /* overlap */
                          + C32_MAX_BYTES      /* length of string diff */
                          + (token->len - overlap) /* diff char data */
                          + C32_MAX_BYTES                /* num prox */
                          + (C32_MAX_BYTES * freq * 3);  /* pos data */

        /* Allocate for worst-case scenario. */
        BB_Grow(tv_buf, new_size);
        ptr = BBEND(tv_buf);

        /* Track number of postings. */
        num_postings += 1;
        
        /* Append the string diff to the tv_buf. */
        Math_encode_c32(overlap, &ptr);
        Math_encode_c32( (token->len - overlap), &ptr);
        memcpy(ptr, (token->text + overlap), (token->len - overlap));
        ptr += token->len - overlap;

        /* Save text and text_len for comparison next loop. */
        last_text = token->text;
        last_len  = token->len;

        /* Append the number of positions for this term. */
        Math_encode_c32(freq, &ptr);

        do {
            /* Add position, start_offset, and end_offset to tv_buf. */
            Math_encode_c32(token->pos, &ptr);
            Math_encode_c32(token->start_offset, &ptr);
            Math_encode_c32(token->end_offset, &ptr);

        } while (--freq && (token = *++tokens));

        /* Set new byte length. */
        BB_Set_Size(tv_buf, ptr - tv_buf->ptr); 
    }
    
    /* Go back and start the term vector string with the number of postings. */
    dest = tv_buf->ptr;
    Math_encode_padded_c32(num_postings, &dest);

    return tv_buf;
}