void HLWriter_Add_Inverted_Doc_IMP(HighlightWriter *self, Inverter *inverter, int32_t doc_id) { HighlightWriterIVARS *const ivars = HLWriter_IVARS(self); OutStream *dat_out = S_lazy_init(self); OutStream *ix_out = ivars->ix_out; int64_t filepos = OutStream_Tell(dat_out); uint32_t num_highlightable = 0; int32_t expected = (int32_t)(OutStream_Tell(ix_out) / 8); // Verify doc id. if (doc_id != expected) { THROW(ERR, "Expected doc id %i32 but got %i32", expected, doc_id); } // Write index data. OutStream_Write_I64(ix_out, filepos); // Count, then write number of highlightable fields. Inverter_Iterate(inverter); while (Inverter_Next(inverter)) { FieldType *type = Inverter_Get_Type(inverter); if (FType_Is_A(type, FULLTEXTTYPE) && FullTextType_Highlightable((FullTextType*)type) ) { num_highlightable++; } } OutStream_Write_C32(dat_out, num_highlightable); Inverter_Iterate(inverter); while (Inverter_Next(inverter)) { FieldType *type = Inverter_Get_Type(inverter); if (FType_Is_A(type, FULLTEXTTYPE) && FullTextType_Highlightable((FullTextType*)type) ) { String *field = Inverter_Get_Field_Name(inverter); Inversion *inversion = Inverter_Get_Inversion(inverter); ByteBuf *tv_buf = HLWriter_TV_Buf(self, inversion); Freezer_serialize_string(field, dat_out); Freezer_serialize_bytebuf(tv_buf, dat_out); DECREF(tv_buf); } } }
void HLWriter_add_inverted_doc(HighlightWriter *self, Inverter *inverter, i32_t doc_id) { OutStream *dat_out = S_lazy_init(self); OutStream *ix_out = self->ix_out; i64_t filepos = OutStream_Tell(dat_out); u32_t num_highlightable = 0; i32_t expected = (i32_t)(OutStream_Tell(ix_out) / 8); /* Verify doc id. */ if (doc_id != expected) THROW("Expected doc id %i32 but got %i32", expected, doc_id); /* Write index data. */ OutStream_Write_U64(ix_out, filepos); /* Count, then write number of highlightable fields. */ Inverter_Iter_Init(inverter); while (Inverter_Next(inverter)) { FieldType *type = Inverter_Get_Type(inverter); if ( OBJ_IS_A(type, FULLTEXTTYPE) && FullTextType_Highlightable(type) ) { num_highlightable++; } } OutStream_Write_C32(dat_out, num_highlightable); Inverter_Iter_Init(inverter); while (Inverter_Next(inverter)) { FieldType *type = Inverter_Get_Type(inverter); if ( OBJ_IS_A(type, FULLTEXTTYPE) && FullTextType_Highlightable(type) ) { CharBuf *field = Inverter_Get_Field_Name(inverter); Inversion *inversion = Inverter_Get_Inversion(inverter); ByteBuf *tv_buf = HLWriter_TV_Buf(self, inversion); CB_Serialize(field, dat_out); BB_Serialize(tv_buf, dat_out); DECREF(tv_buf); } } }
void HLWriter_Add_Segment_IMP(HighlightWriter *self, SegReader *reader, I32Array *doc_map) { HighlightWriterIVARS *const ivars = HLWriter_IVARS(self); int32_t doc_max = SegReader_Doc_Max(reader); if (doc_max == 0) { // Bail if the supplied segment is empty. return; } else { DefaultHighlightReader *hl_reader = (DefaultHighlightReader*)CERTIFY( SegReader_Obtain(reader, Class_Get_Name(HIGHLIGHTREADER)), DEFAULTHIGHLIGHTREADER); OutStream *dat_out = S_lazy_init(self); OutStream *ix_out = ivars->ix_out; int32_t orig; ByteBuf *bb = BB_new(0); for (orig = 1; orig <= doc_max; orig++) { // Skip deleted docs. if (doc_map && !I32Arr_Get(doc_map, orig)) { continue; } // Write file pointer. OutStream_Write_I64(ix_out, OutStream_Tell(dat_out)); // Copy the raw record. DefHLReader_Read_Record(hl_reader, orig, bb); OutStream_Write_Bytes(dat_out, BB_Get_Buf(bb), BB_Get_Size(bb)); BB_Set_Size(bb, 0); } DECREF(bb); } }
void DocWriter_Add_Segment_IMP(DocWriter *self, SegReader *reader, I32Array *doc_map) { DocWriterIVARS *const ivars = DocWriter_IVARS(self); int32_t doc_max = SegReader_Doc_Max(reader); if (doc_max == 0) { // Bail if the supplied segment is empty. return; } else { OutStream *const dat_out = S_lazy_init(self); OutStream *const ix_out = ivars->ix_out; ByteBuf *const buffer = BB_new(0); DefaultDocReader *const doc_reader = (DefaultDocReader*)CERTIFY( SegReader_Obtain(reader, VTable_Get_Name(DOCREADER)), DEFAULTDOCREADER); for (int32_t i = 1, max = SegReader_Doc_Max(reader); i <= max; i++) { if (I32Arr_Get(doc_map, i)) { int64_t start = OutStream_Tell(dat_out); // Copy record over. DefDocReader_Read_Record(doc_reader, buffer, i); char *buf = BB_Get_Buf(buffer); size_t size = BB_Get_Size(buffer); OutStream_Write_Bytes(dat_out, buf, size); // Write file pointer. OutStream_Write_I64(ix_out, start); } } DECREF(buffer); } }
void DocWriter_add_inverted_doc(DocWriter *self, Inverter *inverter, int32_t doc_id) { OutStream *dat_out = S_lazy_init(self); OutStream *ix_out = self->ix_out; uint32_t num_stored = 0; int64_t start = OutStream_Tell(dat_out); int64_t expected = OutStream_Tell(ix_out) / 8; // Verify doc id. if (doc_id != expected) { THROW(ERR, "Expected doc id %i64 but got %i32", expected, doc_id); } // Write the number of stored fields. Inverter_Iterate(inverter); while (Inverter_Next(inverter)) { FieldType *type = Inverter_Get_Type(inverter); if (FType_Stored(type)) { num_stored++; } } OutStream_Write_C32(dat_out, num_stored); Inverter_Iterate(inverter); while (Inverter_Next(inverter)) { // Only store fields marked as "stored". FieldType *type = Inverter_Get_Type(inverter); if (FType_Stored(type)) { CharBuf *field = Inverter_Get_Field_Name(inverter); Obj *value = Inverter_Get_Value(inverter); CB_Serialize(field, dat_out); Obj_Serialize(value, dat_out); } } // Write file pointer. OutStream_Write_I64(ix_out, start); }
void HLWriter_add_segment(HighlightWriter *self, SegReader *reader, I32Array *doc_map) { i32_t doc_max = SegReader_Doc_Max(reader); if (doc_max == 0) { /* Bail if the supplied segment is empty. */ return; } else { DefaultHighlightReader *hl_reader = (DefaultHighlightReader*) ASSERT_IS_A(SegReader_Obtain(reader, HIGHLIGHTREADER.name), DEFAULTHIGHLIGHTREADER); OutStream *dat_out = S_lazy_init(self); OutStream *ix_out = self->ix_out; i32_t orig; ByteBuf *bb = BB_new(0); for (orig = 1; orig <= doc_max; orig++) { /* Skip deleted docs. */ if (doc_map && !I32Arr_Get(doc_map, orig)) continue; /* Write file pointer. */ OutStream_Write_U64( ix_out, OutStream_Tell(dat_out) ); /* Copy the raw record. */ DefHLReader_Read_Record(hl_reader, orig, bb); OutStream_Write_Bytes(dat_out, bb->ptr, bb->size); bb->size = 0; } DECREF(bb); } }
void DocWriter_Add_Inverted_Doc_IMP(DocWriter *self, Inverter *inverter, int32_t doc_id) { DocWriterIVARS *const ivars = DocWriter_IVARS(self); OutStream *dat_out = S_lazy_init(self); OutStream *ix_out = ivars->ix_out; uint32_t num_stored = 0; int64_t start = OutStream_Tell(dat_out); int64_t expected = OutStream_Tell(ix_out) / 8; // Verify doc id. if (doc_id != expected) { THROW(ERR, "Expected doc id %i64 but got %i32", expected, doc_id); } // Write the number of stored fields. Inverter_Iterate(inverter); while (Inverter_Next(inverter)) { FieldType *type = Inverter_Get_Type(inverter); if (FType_Stored(type)) { num_stored++; } } OutStream_Write_C32(dat_out, num_stored); Inverter_Iterate(inverter); while (Inverter_Next(inverter)) { // Only store fields marked as "stored". FieldType *type = Inverter_Get_Type(inverter); if (FType_Stored(type)) { String *field = Inverter_Get_Field_Name(inverter); Obj *value = Inverter_Get_Value(inverter); Freezer_serialize_string(field, dat_out); switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: { const char *buf = Str_Get_Ptr8((String*)value); size_t size = Str_Get_Size((String*)value); OutStream_Write_C32(dat_out, size); OutStream_Write_Bytes(dat_out, buf, size); break; } case FType_BLOB: { char *buf = BB_Get_Buf((ByteBuf*)value); size_t size = BB_Get_Size((ByteBuf*)value); OutStream_Write_C32(dat_out, size); OutStream_Write_Bytes(dat_out, buf, size); break; } case FType_INT32: { int32_t val = Int32_Get_Value((Integer32*)value); OutStream_Write_C32(dat_out, val); break; } case FType_INT64: { int64_t val = Int64_Get_Value((Integer64*)value); OutStream_Write_C64(dat_out, val); break; } case FType_FLOAT32: { float val = Float32_Get_Value((Float32*)value); OutStream_Write_F32(dat_out, val); break; } case FType_FLOAT64: { double val = Float64_Get_Value((Float64*)value); OutStream_Write_F64(dat_out, val); break; } default: THROW(ERR, "Unrecognized type: %o", type); } } } // Write file pointer. OutStream_Write_I64(ix_out, start); }