void Freezer_serialize_blob(Blob *blob, OutStream *outstream) { size_t size = Blob_Get_Size(blob); if (size > INT32_MAX) { THROW(ERR, "Can't serialize blob above 2GB: %u64", (uint64_t)size); } OutStream_Write_CU64(outstream, size); OutStream_Write_Bytes(outstream, Blob_Get_Buf(blob), size); }
static TermVector* S_extract_tv_from_tv_buf(String *field, String *term_text, Blob *tv_buf) { TermVector *retval = NULL; const char *posdata = Blob_Get_Buf(tv_buf); const char *posdata_end = posdata + Blob_Get_Size(tv_buf); int32_t *positions = NULL; int32_t *starts = NULL; int32_t *ends = NULL; uint32_t num_pos = 0; if (posdata != posdata_end) { num_pos = NumUtil_decode_cu32(&posdata); positions = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t)); starts = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t)); ends = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t)); } // Expand CI32s. for (uint32_t i = 0; i < num_pos; i++) { positions[i] = NumUtil_decode_ci32(&posdata); starts[i] = NumUtil_decode_ci32(&posdata); ends[i] = NumUtil_decode_ci32(&posdata); } if (posdata != posdata_end) { THROW(ERR, "Bad encoding of posdata"); } else { I32Array *posits_map = I32Arr_new_steal(positions, num_pos); I32Array *starts_map = I32Arr_new_steal(starts, num_pos); I32Array *ends_map = I32Arr_new_steal(ends, num_pos); retval = TV_new(field, term_text, posits_map, starts_map, ends_map); DECREF(posits_map); DECREF(starts_map); DECREF(ends_map); } return retval; }
static Hash* S_extract_tv_cache(Blob *field_buf) { Hash *tv_cache = Hash_new(0); const char *tv_string = Blob_Get_Buf(field_buf); int32_t num_terms = NumUtil_decode_ci32(&tv_string); ByteBuf *text_buf = BB_new(0); // Read the number of highlightable terms in the field. for (int32_t i = 0; i < num_terms; i++) { size_t overlap = NumUtil_decode_cu32(&tv_string); size_t len = NumUtil_decode_cu32(&tv_string); // Decompress the term text. BB_Set_Size(text_buf, overlap); BB_Cat_Bytes(text_buf, tv_string, len); tv_string += len; // Get positions & offsets string. const char *bookmark_ptr = tv_string; int32_t num_positions = NumUtil_decode_ci32(&tv_string); while (num_positions--) { // Leave nums compressed to save a little mem. NumUtil_skip_cint(&tv_string); NumUtil_skip_cint(&tv_string); NumUtil_skip_cint(&tv_string); } len = tv_string - bookmark_ptr; // Store the $text => $posdata pair in the output hash. String *text = BB_Trusted_Utf8_To_String(text_buf); Hash_Store(tv_cache, text, (Obj*)Blob_new(bookmark_ptr, len)); DECREF(text); } DECREF(text_buf); return tv_cache; }
void Freezer_serialize_blob(Blob *blob, OutStream *outstream) { size_t size = Blob_Get_Size(blob); OutStream_Write_C32(outstream, size); OutStream_Write_Bytes(outstream, Blob_Get_Buf(blob), size); }
void DocWriter_Add_Inverted_Doc_IMP(DocWriter *self, Inverter *inverter, int32_t doc_id) { DocWriterIVARS *const ivars = DocWriter_IVARS(self); OutStream *dat_out = S_lazy_init(self); OutStream *ix_out = ivars->ix_out; uint32_t num_stored = 0; int64_t start = OutStream_Tell(dat_out); int64_t expected = OutStream_Tell(ix_out) / 8; // Verify doc id. if (doc_id != expected) { THROW(ERR, "Expected doc id %i64 but got %i32", expected, doc_id); } // Write the number of stored fields. Inverter_Iterate(inverter); while (Inverter_Next(inverter)) { FieldType *type = Inverter_Get_Type(inverter); if (FType_Stored(type)) { num_stored++; } } OutStream_Write_CU32(dat_out, num_stored); Inverter_Iterate(inverter); while (Inverter_Next(inverter)) { // Only store fields marked as "stored". FieldType *type = Inverter_Get_Type(inverter); if (FType_Stored(type)) { String *field = Inverter_Get_Field_Name(inverter); Obj *value = Inverter_Get_Value(inverter); Freezer_serialize_string(field, dat_out); switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: { const char *buf = Str_Get_Ptr8((String*)value); size_t size = Str_Get_Size((String*)value); if (size > INT32_MAX) { THROW(ERR, "Field %o over 2GB: %u64", field, (uint64_t)size); } OutStream_Write_CU32(dat_out, (uint32_t)size); OutStream_Write_Bytes(dat_out, buf, size); break; } case FType_BLOB: { const char *buf = Blob_Get_Buf((Blob*)value); size_t size = Blob_Get_Size((Blob*)value); if (size > INT32_MAX) { THROW(ERR, "Field %o over 2GB: %u64", field, (uint64_t)size); } OutStream_Write_CU32(dat_out, (uint32_t)size); OutStream_Write_Bytes(dat_out, buf, size); break; } case FType_INT32: { int32_t val = (int32_t)Int_Get_Value((Integer*)value); OutStream_Write_CI32(dat_out, val); break; } case FType_INT64: { int64_t val = Int_Get_Value((Integer*)value); OutStream_Write_CI64(dat_out, val); break; } case FType_FLOAT32: { float val = (float)Float_Get_Value((Float*)value); OutStream_Write_F32(dat_out, val); break; } case FType_FLOAT64: { double val = Float_Get_Value((Float*)value); OutStream_Write_F64(dat_out, val); break; } default: THROW(ERR, "Unrecognized type: %o", type); } } } // Write file pointer. OutStream_Write_I64(ix_out, start); }
static void S_write_val(Obj *val, int8_t prim_id, OutStream *ix_out, OutStream *dat_out, int64_t dat_start) { if (val) { switch (prim_id & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: { String *string = (String*)val; int64_t dat_pos = OutStream_Tell(dat_out) - dat_start; OutStream_Write_I64(ix_out, dat_pos); OutStream_Write_Bytes(dat_out, Str_Get_Ptr8(string), Str_Get_Size(string)); break; } case FType_BLOB: { Blob *blob = (Blob*)val; int64_t dat_pos = OutStream_Tell(dat_out) - dat_start; OutStream_Write_I64(ix_out, dat_pos); OutStream_Write_Bytes(dat_out, Blob_Get_Buf(blob), Blob_Get_Size(blob)); break; } case FType_INT32: { int32_t i32 = (int32_t)Int_Get_Value((Integer*)val); OutStream_Write_I32(dat_out, i32); break; } case FType_INT64: { int64_t i64 = Int_Get_Value((Integer*)val); OutStream_Write_I64(dat_out, i64); break; } case FType_FLOAT32: { float f32 = (float)Float_Get_Value((Float*)val); OutStream_Write_F32(dat_out, f32); break; } case FType_FLOAT64: { double f64 = Float_Get_Value((Float*)val); OutStream_Write_F64(dat_out, f64); break; } default: THROW(ERR, "Unrecognized primitive id: %i32", (int32_t)prim_id); } } else { switch (prim_id & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: case FType_BLOB: { int64_t dat_pos = OutStream_Tell(dat_out) - dat_start; OutStream_Write_I64(ix_out, dat_pos); } break; case FType_INT32: OutStream_Write_I32(dat_out, 0); break; case FType_INT64: OutStream_Write_I64(dat_out, 0); break; case FType_FLOAT64: OutStream_Write_F64(dat_out, 0.0); break; case FType_FLOAT32: OutStream_Write_F32(dat_out, 0.0f); break; default: THROW(ERR, "Unrecognized primitive id: %i32", (int32_t)prim_id); } } }