TermVector* TV_Deserialize_IMP(TermVector *self, InStream *instream) { String *field = Freezer_read_string(instream); String *text = Freezer_read_string(instream); size_t num_pos = InStream_Read_C64(instream); // Read positional data. int32_t *posits = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t)); int32_t *starts = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t)); int32_t *ends = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t)); for (size_t i = 0; i < num_pos; i++) { posits[i] = InStream_Read_C32(instream); starts[i] = InStream_Read_C32(instream); ends[i] = InStream_Read_C32(instream); } I32Array *positions = I32Arr_new_steal(posits, num_pos); I32Array *start_offsets = I32Arr_new_steal(starts, num_pos); I32Array *end_offsets = I32Arr_new_steal(ends, num_pos); TV_init(self, field, text, positions, start_offsets, end_offsets); DECREF(positions); DECREF(start_offsets); DECREF(end_offsets); DECREF(text); DECREF(field); return self; }
static void S_read_entry(LexIndex *self) { LexIndexIVARS *const ivars = LexIndex_IVARS(self); InStream *ix_in = ivars->ix_in; TermInfo *const tinfo = ivars->tinfo; int64_t offset = (int64_t)NumUtil_decode_bigend_u64(ivars->offsets + ivars->tick); InStream_Seek(ix_in, offset); TermStepper_Read_Key_Frame(ivars->term_stepper, ix_in); int32_t doc_freq = InStream_Read_C32(ix_in); TInfo_Set_Doc_Freq(tinfo, doc_freq); TInfo_Set_Post_FilePos(tinfo, InStream_Read_C64(ix_in)); int64_t skip_filepos = doc_freq >= ivars->skip_interval ? InStream_Read_C64(ix_in) : 0; TInfo_Set_Skip_FilePos(tinfo, skip_filepos); TInfo_Set_Lex_FilePos(tinfo, InStream_Read_C64(ix_in)); }
void MatchTInfoStepper_read_delta(MatchTermInfoStepper *self, InStream *instream) { TermInfo *const tinfo = (TermInfo*)self->value; // Read doc freq. tinfo->doc_freq = InStream_Read_C32(instream); // Adjust postings file pointer. tinfo->post_filepos += InStream_Read_C64(instream); // Maybe read skip pointer. if (tinfo->doc_freq >= self->skip_interval) { tinfo->skip_filepos = InStream_Read_C64(instream); } else { tinfo->skip_filepos = 0; } }
void MatchTInfoStepper_Read_Delta_IMP(MatchTermInfoStepper *self, InStream *instream) { MatchTermInfoStepperIVARS *const ivars = MatchTInfoStepper_IVARS(self); TermInfoIVARS *const tinfo_ivars = TInfo_IVARS((TermInfo*)ivars->value); // Read doc freq. tinfo_ivars->doc_freq = InStream_Read_C32(instream); // Adjust postings file pointer. tinfo_ivars->post_filepos += InStream_Read_C64(instream); // Maybe read skip pointer. if (tinfo_ivars->doc_freq >= ivars->skip_interval) { tinfo_ivars->skip_filepos = InStream_Read_C64(instream); } else { tinfo_ivars->skip_filepos = 0; } }
HitDoc* DefDocReader_Fetch_Doc_IMP(DefaultDocReader *self, int32_t doc_id) { DefaultDocReaderIVARS *const ivars = DefDocReader_IVARS(self); Schema *const schema = ivars->schema; InStream *const dat_in = ivars->dat_in; InStream *const ix_in = ivars->ix_in; Hash *const fields = Hash_new(1); int64_t start; uint32_t num_fields; uint32_t field_name_cap = 31; char *field_name = (char*)MALLOCATE(field_name_cap + 1); // Get data file pointer from index, read number of fields. InStream_Seek(ix_in, (int64_t)doc_id * 8); start = InStream_Read_U64(ix_in); InStream_Seek(dat_in, start); num_fields = InStream_Read_C32(dat_in); // Decode stored data and build up the doc field by field. while (num_fields--) { uint32_t field_name_len; Obj *value; FieldType *type; // Read field name. field_name_len = InStream_Read_C32(dat_in); if (field_name_len > field_name_cap) { field_name_cap = field_name_len; field_name = (char*)REALLOCATE(field_name, field_name_cap + 1); } InStream_Read_Bytes(dat_in, field_name, field_name_len); // Find the Field's FieldType. StackString *field_name_str = SSTR_WRAP_UTF8(field_name, field_name_len); type = Schema_Fetch_Type(schema, (String*)field_name_str); // Read the field value. switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: { uint32_t value_len = InStream_Read_C32(dat_in); char *buf = (char*)MALLOCATE(value_len + 1); InStream_Read_Bytes(dat_in, buf, value_len); buf[value_len] = '\0'; value = (Obj*)Str_new_steal_utf8(buf, value_len); break; } case FType_BLOB: { uint32_t value_len = InStream_Read_C32(dat_in); char *buf = (char*)MALLOCATE(value_len); InStream_Read_Bytes(dat_in, buf, value_len); value = (Obj*)BB_new_steal_bytes( buf, value_len, value_len); break; } case FType_FLOAT32: value = (Obj*)Float32_new( InStream_Read_F32(dat_in)); break; case FType_FLOAT64: value = (Obj*)Float64_new( InStream_Read_F64(dat_in)); break; case FType_INT32: value = (Obj*)Int32_new( (int32_t)InStream_Read_C32(dat_in)); break; case FType_INT64: value = (Obj*)Int64_new( (int64_t)InStream_Read_C64(dat_in)); break; default: value = NULL; THROW(ERR, "Unrecognized type: %o", type); } // Store the value. Hash_Store_Utf8(fields, field_name, field_name_len, value); } FREEMEM(field_name); HitDoc *retval = HitDoc_new(fields, doc_id, 0.0); DECREF(fields); return retval; }
Obj* Freezer_deserialize(Obj *obj, InStream *instream) { if (Obj_is_a(obj, STRING)) { obj = (Obj*)Freezer_deserialize_string((String*)obj, instream); } else if (Obj_is_a(obj, BLOB)) { obj = (Obj*)Freezer_deserialize_blob((Blob*)obj, instream); } else if (Obj_is_a(obj, VECTOR)) { obj = (Obj*)Freezer_deserialize_varray((Vector*)obj, instream); } else if (Obj_is_a(obj, HASH)) { obj = (Obj*)Freezer_deserialize_hash((Hash*)obj, instream); } else if (Obj_is_a(obj, INTEGER)) { int64_t value = (int64_t)InStream_Read_C64(instream); obj = (Obj*)Int_init((Integer*)obj, value); } else if (Obj_is_a(obj, FLOAT)) { double value = InStream_Read_F64(instream); obj = (Obj*)Float_init((Float*)obj, value); } else if (Obj_is_a(obj, BOOLEAN)) { bool value = !!InStream_Read_U8(instream); Obj *result = value ? INCREF(CFISH_TRUE) : INCREF(CFISH_FALSE); // FIXME: This DECREF is essentially a no-op causing a // memory leak. DECREF(obj); obj = result; } else if (Obj_is_a(obj, QUERY)) { obj = (Obj*)Query_Deserialize((Query*)obj, instream); } else if (Obj_is_a(obj, DOC)) { obj = (Obj*)Doc_Deserialize((Doc*)obj, instream); } else if (Obj_is_a(obj, DOCVECTOR)) { obj = (Obj*)DocVec_Deserialize((DocVector*)obj, instream); } else if (Obj_is_a(obj, TERMVECTOR)) { obj = (Obj*)TV_Deserialize((TermVector*)obj, instream); } else if (Obj_is_a(obj, SIMILARITY)) { obj = (Obj*)Sim_Deserialize((Similarity*)obj, instream); } else if (Obj_is_a(obj, MATCHDOC)) { obj = (Obj*)MatchDoc_Deserialize((MatchDoc*)obj, instream); } else if (Obj_is_a(obj, TOPDOCS)) { obj = (Obj*)TopDocs_Deserialize((TopDocs*)obj, instream); } else if (Obj_is_a(obj, SORTSPEC)) { obj = (Obj*)SortSpec_Deserialize((SortSpec*)obj, instream); } else if (Obj_is_a(obj, SORTRULE)) { obj = (Obj*)SortRule_Deserialize((SortRule*)obj, instream); } else { THROW(ERR, "Don't know how to deserialize a %o", Obj_get_class_name(obj)); } return obj; }