Example #1
0
static void
test_c32(TestBatch *batch) {
    uint64_t  mins[]   = { 0,   0x4000 - 100, (uint32_t)I32_MAX - 100, U32_MAX - 10 };
    uint64_t  limits[] = { 500, 0x4000 + 100, (uint32_t)I32_MAX + 100, U32_MAX      };
    uint32_t  set_num;
    uint32_t  num_sets  = sizeof(mins) / sizeof(uint64_t);
    size_t    count     = 64;
    uint64_t *ints      = NULL;
    size_t    amount    = count * C32_MAX_BYTES;
    char     *encoded   = (char*)CALLOCATE(amount, sizeof(char));
    char     *target    = encoded;
    char     *limit     = target + amount;

    for (set_num = 0; set_num < num_sets; set_num++) {
        char *skip;
        ints = TestUtils_random_u64s(ints, count,
                                     mins[set_num], limits[set_num]);
        target = encoded;
        for (size_t i = 0; i < count; i++) {
            NumUtil_encode_c32((uint32_t)ints[i], &target);
        }
        target = encoded;
        skip   = encoded;
        for (size_t i = 0; i < count; i++) {
            TEST_INT_EQ(batch, NumUtil_decode_c32(&target), (long)ints[i],
                        "c32 %lu", (long)ints[i]);
            NumUtil_skip_cint(&skip);
            if (target > limit) { THROW(ERR, "overrun"); }
        }
        TEST_TRUE(batch, skip == target, "skip %lu == %lu",
                  (unsigned long)skip, (unsigned long)target);

        target = encoded;
        for (size_t i = 0; i < count; i++) {
            NumUtil_encode_padded_c32((uint32_t)ints[i], &target);
        }
        TEST_TRUE(batch, target == limit,
                  "padded c32 uses 5 bytes (%lu == %lu)", (unsigned long)target,
                  (unsigned long)limit);
        target = encoded;
        skip   = encoded;
        for (size_t i = 0; i < count; i++) {
            TEST_INT_EQ(batch, NumUtil_decode_c32(&target), (long)ints[i],
                        "padded c32 %lu", (long)ints[i]);
            NumUtil_skip_cint(&skip);
            if (target > limit) { THROW(ERR, "overrun"); }
        }
        TEST_TRUE(batch, skip == target, "skip padded %lu == %lu",
                  (unsigned long)skip, (unsigned long)target);
    }

    target = encoded;
    NumUtil_encode_c32(U32_MAX, &target);
    target = encoded;
    TEST_INT_EQ(batch, NumUtil_decode_c32(&target), U32_MAX, "c32 U32_MAX");

    FREEMEM(encoded);
    FREEMEM(ints);
}
Example #2
0
ByteBuf*
HLWriter_TV_Buf_IMP(HighlightWriter *self, Inversion *inversion) {
    const char *last_text = "";
    size_t      last_len = 0;
    ByteBuf    *tv_buf = BB_new(20 + Inversion_Get_Size(inversion) * 8);
    uint32_t    num_postings = 0;
    Token     **tokens;
    uint32_t    freq;
    UNUSED_VAR(self);

    // Leave space for a c32 indicating the number of postings.
    BB_Set_Size(tv_buf, C32_MAX_BYTES);

    Inversion_Reset(inversion);
    while ((tokens = Inversion_Next_Cluster(inversion, &freq)) != NULL) {
        Token *token = *tokens;
        char *const   token_text = Token_Get_Text(token);
        const int32_t token_len  = Token_Get_Len(token);

        int32_t overlap = StrHelp_overlap(last_text, token_text,
                                          last_len, token_len);
        char *ptr;
        char *orig;
        size_t old_size = BB_Get_Size(tv_buf);
        size_t new_size = old_size
                          + C32_MAX_BYTES      // overlap
                          + C32_MAX_BYTES      // length of string diff
                          + (token_len - overlap)        // diff char data
                          + C32_MAX_BYTES                // num prox
                          + (C32_MAX_BYTES * freq * 3);  // pos data

        // Allocate for worst-case scenario.
        ptr  = BB_Grow(tv_buf, new_size);
        orig = ptr;
        ptr += old_size;

        // Track number of postings.
        num_postings += 1;

        // Append the string diff to the tv_buf.
        NumUtil_encode_c32(overlap, &ptr);
        NumUtil_encode_c32((token_len - overlap), &ptr);
        memcpy(ptr, (token_text + overlap), (token_len - overlap));
        ptr += token_len - overlap;

        // Save text and text_len for comparison next loop.
        last_text = token_text;
        last_len  = token_len;

        // Append the number of positions for this term.
        NumUtil_encode_c32(freq, &ptr);

        do {
            // Add position, start_offset, and end_offset to tv_buf.
            NumUtil_encode_c32(Token_Get_Pos(token), &ptr);
            NumUtil_encode_c32(Token_Get_Start_Offset(token), &ptr);
            NumUtil_encode_c32(Token_Get_End_Offset(token), &ptr);
        } while (--freq && (token = *++tokens));

        // Set new byte length.
        BB_Set_Size(tv_buf, ptr - orig);
    }

    // Go back and start the term vector string with the posting count.
    char *dest = BB_Get_Buf(tv_buf);
    NumUtil_encode_padded_c32(num_postings, &dest);

    return tv_buf;
}