Пример #1
0
static void
test_c32(TestBatch *batch) {
    uint64_t  mins[]   = { 0,   0x4000 - 100, (uint32_t)I32_MAX - 100, U32_MAX - 10 };
    uint64_t  limits[] = { 500, 0x4000 + 100, (uint32_t)I32_MAX + 100, U32_MAX      };
    uint32_t  set_num;
    uint32_t  num_sets  = sizeof(mins) / sizeof(uint64_t);
    size_t    count     = 64;
    uint64_t *ints      = NULL;
    size_t    amount    = count * C32_MAX_BYTES;
    char     *encoded   = (char*)CALLOCATE(amount, sizeof(char));
    char     *target    = encoded;
    char     *limit     = target + amount;

    for (set_num = 0; set_num < num_sets; set_num++) {
        char *skip;
        ints = TestUtils_random_u64s(ints, count,
                                     mins[set_num], limits[set_num]);
        target = encoded;
        for (size_t i = 0; i < count; i++) {
            NumUtil_encode_c32((uint32_t)ints[i], &target);
        }
        target = encoded;
        skip   = encoded;
        for (size_t i = 0; i < count; i++) {
            TEST_INT_EQ(batch, NumUtil_decode_c32(&target), (long)ints[i],
                        "c32 %lu", (long)ints[i]);
            NumUtil_skip_cint(&skip);
            if (target > limit) { THROW(ERR, "overrun"); }
        }
        TEST_TRUE(batch, skip == target, "skip %lu == %lu",
                  (unsigned long)skip, (unsigned long)target);

        target = encoded;
        for (size_t i = 0; i < count; i++) {
            NumUtil_encode_padded_c32((uint32_t)ints[i], &target);
        }
        TEST_TRUE(batch, target == limit,
                  "padded c32 uses 5 bytes (%lu == %lu)", (unsigned long)target,
                  (unsigned long)limit);
        target = encoded;
        skip   = encoded;
        for (size_t i = 0; i < count; i++) {
            TEST_INT_EQ(batch, NumUtil_decode_c32(&target), (long)ints[i],
                        "padded c32 %lu", (long)ints[i]);
            NumUtil_skip_cint(&skip);
            if (target > limit) { THROW(ERR, "overrun"); }
        }
        TEST_TRUE(batch, skip == target, "skip padded %lu == %lu",
                  (unsigned long)skip, (unsigned long)target);
    }

    target = encoded;
    NumUtil_encode_c32(U32_MAX, &target);
    target = encoded;
    TEST_INT_EQ(batch, NumUtil_decode_c32(&target), U32_MAX, "c32 U32_MAX");

    FREEMEM(encoded);
    FREEMEM(ints);
}
Пример #2
0
static TermVector*
S_extract_tv_from_tv_buf(String *field, String *term_text,
                         ByteBuf *tv_buf) {
    TermVector *retval      = NULL;
    const char *posdata     = BB_Get_Buf(tv_buf);
    const char *posdata_end = posdata + BB_Get_Size(tv_buf);
    int32_t    *positions   = NULL;
    int32_t    *starts      = NULL;
    int32_t    *ends        = NULL;
    uint32_t    num_pos     = 0;

    if (posdata != posdata_end) {
        num_pos   = NumUtil_decode_c32(&posdata);
        positions = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t));
        starts    = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t));
        ends      = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t));
    }

    // Expand C32s.
    for (uint32_t i = 0; i < num_pos; i++) {
        positions[i] = NumUtil_decode_c32(&posdata);
        starts[i]    = NumUtil_decode_c32(&posdata);
        ends[i]      = NumUtil_decode_c32(&posdata);
    }

    if (posdata != posdata_end) {
        THROW(ERR, "Bad encoding of posdata");
    }
    else {
        I32Array *posits_map = I32Arr_new_steal(positions, num_pos);
        I32Array *starts_map = I32Arr_new_steal(starts, num_pos);
        I32Array *ends_map   = I32Arr_new_steal(ends, num_pos);
        retval = TV_new(field, term_text, posits_map, starts_map, ends_map);
        DECREF(posits_map);
        DECREF(starts_map);
        DECREF(ends_map);
    }

    return retval;
}
Пример #3
0
void
ScorePost_Read_Record_IMP(ScorePosting *self, InStream *instream) {
    ScorePostingIVARS *const ivars = ScorePost_IVARS(self);
    uint32_t  position = 0;
    const size_t max_start_bytes = (C32_MAX_BYTES * 2) + 1;
    const char *buf = InStream_Buf(instream, max_start_bytes);
    const uint32_t doc_code = NumUtil_decode_c32(&buf);
    const uint32_t doc_delta = doc_code >> 1;

    // Apply delta doc and retrieve freq.
    ivars->doc_id   += doc_delta;
    if (doc_code & 1) {
        ivars->freq = 1;
    }
    else {
        ivars->freq = NumUtil_decode_c32(&buf);
    }

    // Decode boost/norm byte.
    ivars->weight = ivars->norm_decoder[*(uint8_t*)buf];
    buf++;

    // Read positions.
    uint32_t num_prox = ivars->freq;
    if (num_prox > ivars->prox_cap) {
        ivars->prox = (uint32_t*)REALLOCATE(
                         ivars->prox, num_prox * sizeof(uint32_t));
        ivars->prox_cap = num_prox;
    }
    uint32_t *positions = ivars->prox;

    InStream_Advance_Buf(instream, buf);
    buf = InStream_Buf(instream, num_prox * C32_MAX_BYTES);
    while (num_prox--) {
        position += NumUtil_decode_c32(&buf);
        *positions++ = position;
    }

    InStream_Advance_Buf(instream, buf);
}
Пример #4
0
static Hash*
S_extract_tv_cache(ByteBuf *field_buf) {
    Hash       *tv_cache  = Hash_new(0);
    const char *tv_string = BB_Get_Buf(field_buf);
    int32_t     num_terms = NumUtil_decode_c32(&tv_string);
    CharBuf    *text_buf  = CB_new(0);

    // Read the number of highlightable terms in the field.
    for (int32_t i = 0; i < num_terms; i++) {
        size_t   overlap = NumUtil_decode_c32(&tv_string);
        size_t   len     = NumUtil_decode_c32(&tv_string);

        // Decompress the term text.
        CB_Set_Size(text_buf, overlap);
        CB_Cat_Trusted_Utf8(text_buf, tv_string, len);
        tv_string += len;

        // Get positions & offsets string.
        const char *bookmark_ptr  = tv_string;
        int32_t     num_positions = NumUtil_decode_c32(&tv_string);
        while (num_positions--) {
            // Leave nums compressed to save a little mem.
            NumUtil_skip_cint(&tv_string);
            NumUtil_skip_cint(&tv_string);
            NumUtil_skip_cint(&tv_string);
        }
        len = tv_string - bookmark_ptr;

        // Store the $text => $posdata pair in the output hash.
        String *text = CB_To_String(text_buf);
        Hash_Store(tv_cache, (Obj*)text,
                   (Obj*)BB_new_bytes(bookmark_ptr, len));
        DECREF(text);
    }
    DECREF(text_buf);

    return tv_cache;
}