Matcher* DefDelWriter_Seg_Deletions_IMP(DefaultDeletionsWriter *self, SegReader *seg_reader) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); Matcher *deletions = NULL; Segment *segment = SegReader_Get_Segment(seg_reader); String *seg_name = Seg_Get_Name(segment); Integer *tick_obj = (Integer*)Hash_Fetch(ivars->name_to_tick, seg_name); size_t tick = tick_obj ? (size_t)Int_Get_Value(tick_obj) : 0; SegReader *candidate = tick_obj ? (SegReader*)Vec_Fetch(ivars->seg_readers, tick) : NULL; if (tick_obj) { DeletionsReader *del_reader = (DeletionsReader*)SegReader_Obtain( candidate, Class_Get_Name(DELETIONSREADER)); if (ivars->updated[tick] || DelReader_Del_Count(del_reader)) { BitVector *deldocs = (BitVector*)Vec_Fetch(ivars->bit_vecs, tick); deletions = (Matcher*)BitVecMatcher_new(deldocs); } } else { // Sanity check. THROW(ERR, "Couldn't find SegReader %o", seg_reader); } return deletions; }
void Freezer_serialize(Obj *obj, OutStream *outstream) { if (Obj_is_a(obj, STRING)) { Freezer_serialize_string((String*)obj, outstream); } else if (Obj_is_a(obj, BLOB)) { Freezer_serialize_blob((Blob*)obj, outstream); } else if (Obj_is_a(obj, VECTOR)) { Freezer_serialize_varray((Vector*)obj, outstream); } else if (Obj_is_a(obj, HASH)) { Freezer_serialize_hash((Hash*)obj, outstream); } else if (Obj_is_a(obj, INTEGER)) { int64_t val = Int_Get_Value((Integer*)obj); OutStream_Write_C64(outstream, (uint64_t)val); } else if (Obj_is_a(obj, FLOAT)) { double val = Float_Get_Value((Float*)obj); OutStream_Write_F64(outstream, val); } else if (Obj_is_a(obj, BOOLEAN)) { bool val = Bool_Get_Value((Boolean*)obj); OutStream_Write_U8(outstream, (uint8_t)val); } else if (Obj_is_a(obj, QUERY)) { Query_Serialize((Query*)obj, outstream); } else if (Obj_is_a(obj, DOC)) { Doc_Serialize((Doc*)obj, outstream); } else if (Obj_is_a(obj, DOCVECTOR)) { DocVec_Serialize((DocVector*)obj, outstream); } else if (Obj_is_a(obj, TERMVECTOR)) { TV_Serialize((TermVector*)obj, outstream); } else if (Obj_is_a(obj, SIMILARITY)) { Sim_Serialize((Similarity*)obj, outstream); } else if (Obj_is_a(obj, MATCHDOC)) { MatchDoc_Serialize((MatchDoc*)obj, outstream); } else if (Obj_is_a(obj, TOPDOCS)) { TopDocs_Serialize((TopDocs*)obj, outstream); } else if (Obj_is_a(obj, SORTSPEC)) { SortSpec_Serialize((SortSpec*)obj, outstream); } else if (Obj_is_a(obj, SORTRULE)) { SortRule_Serialize((SortRule*)obj, outstream); } else { THROW(ERR, "Don't know how to serialize a %o", Obj_get_class_name(obj)); } }
int32_t DefDelWriter_Seg_Del_Count_IMP(DefaultDeletionsWriter *self, String *seg_name) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); Integer *tick = (Integer*)Hash_Fetch(ivars->name_to_tick, seg_name); BitVector *deldocs = tick ? (BitVector*)Vec_Fetch(ivars->bit_vecs, (size_t)Int_Get_Value(tick)) : NULL; return deldocs ? (int32_t)BitVec_Count(deldocs) : 0; }
int32_t Seg_Field_Num_IMP(Segment *self, String *field) { if (field == NULL) { return 0; } else { SegmentIVARS *const ivars = Seg_IVARS(self); Integer *num = (Integer*)Hash_Fetch(ivars->by_name, field); return num ? (int32_t)Int_Get_Value(num) : 0; } }
int32_t Seg_Add_Field_IMP(Segment *self, String *field) { SegmentIVARS *const ivars = Seg_IVARS(self); Integer *num = (Integer*)Hash_Fetch(ivars->by_name, field); if (num) { return (int32_t)Int_Get_Value(num); } else { int32_t field_num = (int32_t)Vec_Get_Size(ivars->by_num); Hash_Store(ivars->by_name, field, (Obj*)Int_new(field_num)); Vec_Push(ivars->by_num, (Obj*)Str_Clone(field)); return field_num; } }
void DocWriter_Add_Inverted_Doc_IMP(DocWriter *self, Inverter *inverter, int32_t doc_id) { DocWriterIVARS *const ivars = DocWriter_IVARS(self); OutStream *dat_out = S_lazy_init(self); OutStream *ix_out = ivars->ix_out; uint32_t num_stored = 0; int64_t start = OutStream_Tell(dat_out); int64_t expected = OutStream_Tell(ix_out) / 8; // Verify doc id. if (doc_id != expected) { THROW(ERR, "Expected doc id %i64 but got %i32", expected, doc_id); } // Write the number of stored fields. Inverter_Iterate(inverter); while (Inverter_Next(inverter)) { FieldType *type = Inverter_Get_Type(inverter); if (FType_Stored(type)) { num_stored++; } } OutStream_Write_CU32(dat_out, num_stored); Inverter_Iterate(inverter); while (Inverter_Next(inverter)) { // Only store fields marked as "stored". FieldType *type = Inverter_Get_Type(inverter); if (FType_Stored(type)) { String *field = Inverter_Get_Field_Name(inverter); Obj *value = Inverter_Get_Value(inverter); Freezer_serialize_string(field, dat_out); switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: { const char *buf = Str_Get_Ptr8((String*)value); size_t size = Str_Get_Size((String*)value); if (size > INT32_MAX) { THROW(ERR, "Field %o over 2GB: %u64", field, (uint64_t)size); } OutStream_Write_CU32(dat_out, (uint32_t)size); OutStream_Write_Bytes(dat_out, buf, size); break; } case FType_BLOB: { const char *buf = Blob_Get_Buf((Blob*)value); size_t size = Blob_Get_Size((Blob*)value); if (size > INT32_MAX) { THROW(ERR, "Field %o over 2GB: %u64", field, (uint64_t)size); } OutStream_Write_CU32(dat_out, (uint32_t)size); OutStream_Write_Bytes(dat_out, buf, size); break; } case FType_INT32: { int32_t val = (int32_t)Int_Get_Value((Integer*)value); OutStream_Write_CI32(dat_out, val); break; } case FType_INT64: { int64_t val = Int_Get_Value((Integer*)value); OutStream_Write_CI64(dat_out, val); break; } case FType_FLOAT32: { float val = (float)Float_Get_Value((Float*)value); OutStream_Write_F32(dat_out, val); break; } case FType_FLOAT64: { double val = Float_Get_Value((Float*)value); OutStream_Write_F64(dat_out, val); break; } default: THROW(ERR, "Unrecognized type: %o", type); } } } // Write file pointer. OutStream_Write_I64(ix_out, start); }
static void S_write_val(Obj *val, int8_t prim_id, OutStream *ix_out, OutStream *dat_out, int64_t dat_start) { if (val) { switch (prim_id & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: { String *string = (String*)val; int64_t dat_pos = OutStream_Tell(dat_out) - dat_start; OutStream_Write_I64(ix_out, dat_pos); OutStream_Write_Bytes(dat_out, Str_Get_Ptr8(string), Str_Get_Size(string)); break; } case FType_BLOB: { Blob *blob = (Blob*)val; int64_t dat_pos = OutStream_Tell(dat_out) - dat_start; OutStream_Write_I64(ix_out, dat_pos); OutStream_Write_Bytes(dat_out, Blob_Get_Buf(blob), Blob_Get_Size(blob)); break; } case FType_INT32: { int32_t i32 = (int32_t)Int_Get_Value((Integer*)val); OutStream_Write_I32(dat_out, i32); break; } case FType_INT64: { int64_t i64 = Int_Get_Value((Integer*)val); OutStream_Write_I64(dat_out, i64); break; } case FType_FLOAT32: { float f32 = (float)Float_Get_Value((Float*)val); OutStream_Write_F32(dat_out, f32); break; } case FType_FLOAT64: { double f64 = Float_Get_Value((Float*)val); OutStream_Write_F64(dat_out, f64); break; } default: THROW(ERR, "Unrecognized primitive id: %i32", (int32_t)prim_id); } } else { switch (prim_id & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: case FType_BLOB: { int64_t dat_pos = OutStream_Tell(dat_out) - dat_start; OutStream_Write_I64(ix_out, dat_pos); } break; case FType_INT32: OutStream_Write_I32(dat_out, 0); break; case FType_INT64: OutStream_Write_I64(dat_out, 0); break; case FType_FLOAT64: OutStream_Write_F64(dat_out, 0.0); break; case FType_FLOAT32: OutStream_Write_F32(dat_out, 0.0f); break; default: THROW(ERR, "Unrecognized primitive id: %i32", (int32_t)prim_id); } } }