static void test_c32(TestBatch *batch) { uint64_t mins[] = { 0, 0x4000 - 100, (uint32_t)I32_MAX - 100, U32_MAX - 10 }; uint64_t limits[] = { 500, 0x4000 + 100, (uint32_t)I32_MAX + 100, U32_MAX }; uint32_t set_num; uint32_t num_sets = sizeof(mins) / sizeof(uint64_t); size_t count = 64; uint64_t *ints = NULL; size_t amount = count * C32_MAX_BYTES; char *encoded = (char*)CALLOCATE(amount, sizeof(char)); char *target = encoded; char *limit = target + amount; for (set_num = 0; set_num < num_sets; set_num++) { char *skip; ints = TestUtils_random_u64s(ints, count, mins[set_num], limits[set_num]); target = encoded; for (size_t i = 0; i < count; i++) { NumUtil_encode_c32((uint32_t)ints[i], &target); } target = encoded; skip = encoded; for (size_t i = 0; i < count; i++) { TEST_INT_EQ(batch, NumUtil_decode_c32(&target), (long)ints[i], "c32 %lu", (long)ints[i]); NumUtil_skip_cint(&skip); if (target > limit) { THROW(ERR, "overrun"); } } TEST_TRUE(batch, skip == target, "skip %lu == %lu", (unsigned long)skip, (unsigned long)target); target = encoded; for (size_t i = 0; i < count; i++) { NumUtil_encode_padded_c32((uint32_t)ints[i], &target); } TEST_TRUE(batch, target == limit, "padded c32 uses 5 bytes (%lu == %lu)", (unsigned long)target, (unsigned long)limit); target = encoded; skip = encoded; for (size_t i = 0; i < count; i++) { TEST_INT_EQ(batch, NumUtil_decode_c32(&target), (long)ints[i], "padded c32 %lu", (long)ints[i]); NumUtil_skip_cint(&skip); if (target > limit) { THROW(ERR, "overrun"); } } TEST_TRUE(batch, skip == target, "skip padded %lu == %lu", (unsigned long)skip, (unsigned long)target); } target = encoded; NumUtil_encode_c32(U32_MAX, &target); target = encoded; TEST_INT_EQ(batch, NumUtil_decode_c32(&target), U32_MAX, "c32 U32_MAX"); FREEMEM(encoded); FREEMEM(ints); }
static TermVector* S_extract_tv_from_tv_buf(String *field, String *term_text, ByteBuf *tv_buf) { TermVector *retval = NULL; const char *posdata = BB_Get_Buf(tv_buf); const char *posdata_end = posdata + BB_Get_Size(tv_buf); int32_t *positions = NULL; int32_t *starts = NULL; int32_t *ends = NULL; uint32_t num_pos = 0; if (posdata != posdata_end) { num_pos = NumUtil_decode_c32(&posdata); positions = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t)); starts = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t)); ends = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t)); } // Expand C32s. for (uint32_t i = 0; i < num_pos; i++) { positions[i] = NumUtil_decode_c32(&posdata); starts[i] = NumUtil_decode_c32(&posdata); ends[i] = NumUtil_decode_c32(&posdata); } if (posdata != posdata_end) { THROW(ERR, "Bad encoding of posdata"); } else { I32Array *posits_map = I32Arr_new_steal(positions, num_pos); I32Array *starts_map = I32Arr_new_steal(starts, num_pos); I32Array *ends_map = I32Arr_new_steal(ends, num_pos); retval = TV_new(field, term_text, posits_map, starts_map, ends_map); DECREF(posits_map); DECREF(starts_map); DECREF(ends_map); } return retval; }
void ScorePost_Read_Record_IMP(ScorePosting *self, InStream *instream) { ScorePostingIVARS *const ivars = ScorePost_IVARS(self); uint32_t position = 0; const size_t max_start_bytes = (C32_MAX_BYTES * 2) + 1; const char *buf = InStream_Buf(instream, max_start_bytes); const uint32_t doc_code = NumUtil_decode_c32(&buf); const uint32_t doc_delta = doc_code >> 1; // Apply delta doc and retrieve freq. ivars->doc_id += doc_delta; if (doc_code & 1) { ivars->freq = 1; } else { ivars->freq = NumUtil_decode_c32(&buf); } // Decode boost/norm byte. ivars->weight = ivars->norm_decoder[*(uint8_t*)buf]; buf++; // Read positions. uint32_t num_prox = ivars->freq; if (num_prox > ivars->prox_cap) { ivars->prox = (uint32_t*)REALLOCATE( ivars->prox, num_prox * sizeof(uint32_t)); ivars->prox_cap = num_prox; } uint32_t *positions = ivars->prox; InStream_Advance_Buf(instream, buf); buf = InStream_Buf(instream, num_prox * C32_MAX_BYTES); while (num_prox--) { position += NumUtil_decode_c32(&buf); *positions++ = position; } InStream_Advance_Buf(instream, buf); }
static Hash* S_extract_tv_cache(ByteBuf *field_buf) { Hash *tv_cache = Hash_new(0); const char *tv_string = BB_Get_Buf(field_buf); int32_t num_terms = NumUtil_decode_c32(&tv_string); CharBuf *text_buf = CB_new(0); // Read the number of highlightable terms in the field. for (int32_t i = 0; i < num_terms; i++) { size_t overlap = NumUtil_decode_c32(&tv_string); size_t len = NumUtil_decode_c32(&tv_string); // Decompress the term text. CB_Set_Size(text_buf, overlap); CB_Cat_Trusted_Utf8(text_buf, tv_string, len); tv_string += len; // Get positions & offsets string. const char *bookmark_ptr = tv_string; int32_t num_positions = NumUtil_decode_c32(&tv_string); while (num_positions--) { // Leave nums compressed to save a little mem. NumUtil_skip_cint(&tv_string); NumUtil_skip_cint(&tv_string); NumUtil_skip_cint(&tv_string); } len = tv_string - bookmark_ptr; // Store the $text => $posdata pair in the output hash. String *text = CB_To_String(text_buf); Hash_Store(tv_cache, (Obj*)text, (Obj*)BB_new_bytes(bookmark_ptr, len)); DECREF(text); } DECREF(text_buf); return tv_cache; }