static Folder* S_folder_with_contents() { RAMFolder *folder = RAMFolder_new(seg_1); OutStream *foo_out = RAMFolder_Open_Out(folder, foo); OutStream *bar_out = RAMFolder_Open_Out(folder, bar); OutStream_Write_Bytes(foo_out, "foo", 3); OutStream_Write_Bytes(bar_out, "bar", 3); OutStream_Close(foo_out); OutStream_Close(bar_out); DECREF(foo_out); DECREF(bar_out); return (Folder*)folder; }
static Folder* S_folder_with_contents() { RAMFolder *folder = RAMFolder_new(seg_1); OutStream *foo_out = RAMFolder_Open_Out(folder, foo); OutStream *bar_out = RAMFolder_Open_Out(folder, bar); OutStream_Write_Bytes(foo_out, "foo", 3); OutStream_Write_Bytes(bar_out, "bar", 3); OutStream_Close(foo_out); OutStream_Close(bar_out); DECREF(foo_out); DECREF(bar_out); String *empty = SSTR_BLANK(); RAMFolder_Consolidate(folder, empty); return (Folder*)folder; }
void Freezer_serialize_string(String *string, OutStream *outstream) { size_t size = Str_Get_Size(string); const char *buf = Str_Get_Ptr8(string); OutStream_Write_C64(outstream, size); OutStream_Write_Bytes(outstream, buf, size); }
static void S_write_lockfile_json(void *context) { struct lockfile_context *stuff = (struct lockfile_context*)context; size_t size = Str_Get_Size(stuff->json); OutStream_Write_Bytes(stuff->outstream, Str_Get_Ptr8(stuff->json), size); OutStream_Close(stuff->outstream); }
void DefDelWriter_Finish_IMP(DefaultDeletionsWriter *self) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); Folder *const folder = ivars->folder; for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i); if (ivars->updated[i]) { BitVector *deldocs = (BitVector*)VA_Fetch(ivars->bit_vecs, i); int32_t doc_max = SegReader_Doc_Max(seg_reader); double used = (doc_max + 1) / 8.0; uint32_t byte_size = (uint32_t)ceil(used); uint32_t new_max = byte_size * 8 - 1; String *filename = S_del_filename(self, seg_reader); OutStream *outstream = Folder_Open_Out(folder, filename); if (!outstream) { RETHROW(INCREF(Err_get_error())); } // Ensure that we have 1 bit for each doc in segment. BitVec_Grow(deldocs, new_max); // Write deletions data and clean up. OutStream_Write_Bytes(outstream, (char*)BitVec_Get_Raw_Bits(deldocs), byte_size); OutStream_Close(outstream); DECREF(outstream); DECREF(filename); } } Seg_Store_Metadata_Utf8(ivars->segment, "deletions", 9, (Obj*)DefDelWriter_Metadata(self)); }
static void test_Local_Open_In(TestBatchRunner *runner) { Folder *real_folder = S_folder_with_contents(); CompoundFileReader *cf_reader = CFReader_open(real_folder); InStream *instream; instream = CFReader_Local_Open_In(cf_reader, foo); TEST_TRUE(runner, instream != NULL, "Local_Open_In for virtual file"); TEST_TRUE(runner, Str_Starts_With(InStream_Get_Filename(instream), CFReader_Get_Path(cf_reader)), "InStream's path includes directory"); DECREF(instream); OutStream *outstream = CFReader_Open_Out(cf_reader, baz); OutStream_Write_Bytes(outstream, "baz", 3); OutStream_Close(outstream); DECREF(outstream); instream = CFReader_Local_Open_In(cf_reader, baz); TEST_TRUE(runner, instream != NULL, "Local_Open_In pass-through for real file"); DECREF(instream); Err_set_error(NULL); instream = CFReader_Local_Open_In(cf_reader, stuff); TEST_TRUE(runner, instream == NULL, "Local_Open_In for non-existent file returns NULL"); TEST_TRUE(runner, Err_get_error() != NULL, "Local_Open_In for non-existent file sets global error"); DECREF(cf_reader); DECREF(real_folder); }
static void S_add_last_term_to_ix(LexiconWriter *self, char *last_text, size_t last_size) { OutStream *const ix_out = self->ix_out; OutStream *const ixix_out = self->ixix_out; TermInfo *const last_tinfo = self->last_tinfo; /* Write file pointer to index record. */ OutStream_Write_U64(ixix_out, OutStream_Tell(ix_out)); /* Write term text. */ OutStream_Write_C32(ix_out, last_size); OutStream_Write_Bytes(ix_out, last_text, last_size); /* Write doc_freq. */ OutStream_Write_C32(ix_out, last_tinfo->doc_freq); /* Write postings file pointer. */ OutStream_Write_C64(ix_out, last_tinfo->post_filepos); /* Write skip file pointer (maybe). */ if (last_tinfo->doc_freq >= self->skip_interval) { OutStream_Write_C64(ix_out, last_tinfo->skip_filepos); } /* Write file pointer to main record. */ OutStream_Write_C64(ix_out, OutStream_Tell(self->dat_out)); /* Keep track of how many terms have been added to lexicon.ix. */ self->ix_count++; }
void Freezer_serialize_blob(Blob *blob, OutStream *outstream) { size_t size = Blob_Get_Size(blob); if (size > INT32_MAX) { THROW(ERR, "Can't serialize blob above 2GB: %u64", (uint64_t)size); } OutStream_Write_CU64(outstream, size); OutStream_Write_Bytes(outstream, Blob_Get_Buf(blob), size); }
void Freezer_serialize_string(String *string, OutStream *outstream) { size_t size = Str_Get_Size(string); const char *buf = Str_Get_Ptr8(string); if (size > INT32_MAX) { THROW(ERR, "Can't serialize string above 2GB: %u64", (uint64_t)size); } OutStream_Write_CU64(outstream, size); OutStream_Write_Bytes(outstream, buf, size); }
void TextTermStepper_Write_Key_Frame_IMP(TextTermStepper *self, OutStream *outstream, Obj *value) { TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); CharBuf *charbuf = (CharBuf*)ivars->value; CB_Mimic(charbuf, value); const char *buf = CB_Get_Ptr8(charbuf); size_t size = CB_Get_Size(charbuf); OutStream_Write_C32(outstream, size); OutStream_Write_Bytes(outstream, buf, size); // Invalidate string. DECREF(ivars->string); ivars->string = NULL; }
void RawPost_write_record(RawPosting *self, OutStream *outstream, i32_t last_doc_id) { const u32_t delta_doc = self->doc_id - last_doc_id; char *const aux_content = self->blob + self->content_len; if (self->freq == 1) { const u32_t doc_code = (delta_doc << 1) | 1; OutStream_Write_C32(outstream, doc_code); } else { const u32_t doc_code = delta_doc << 1; OutStream_Write_C32(outstream, doc_code); OutStream_Write_C32(outstream, self->freq); } OutStream_Write_Bytes(outstream, aux_content, self->aux_len); }
static void test_Read_Write_Bytes(TestBatchRunner *runner) { RAMFile *file = RAMFile_new(NULL, false); OutStream *outstream = OutStream_open((Obj*)file); InStream *instream; char buf[4]; OutStream_Write_Bytes(outstream, "foo", 4); OutStream_Close(outstream); instream = InStream_open((Obj*)file); InStream_Read_Bytes(instream, buf, 4); TEST_TRUE(runner, strcmp(buf, "foo") == 0, "Read_Bytes Write_Bytes"); DECREF(instream); DECREF(outstream); DECREF(file); }
void MatchPostWriter_write_posting(MatchPostingWriter *self, RawPosting *posting) { OutStream *const outstream = self->outstream; const int32_t doc_id = posting->doc_id; const uint32_t delta_doc = doc_id - self->last_doc_id; char *const aux_content = posting->blob + posting->content_len; if (posting->freq == 1) { const uint32_t doc_code = (delta_doc << 1) | 1; OutStream_Write_C32(outstream, doc_code); } else { const uint32_t doc_code = delta_doc << 1; OutStream_Write_C32(outstream, doc_code); OutStream_Write_C32(outstream, posting->freq); } OutStream_Write_Bytes(outstream, aux_content, posting->aux_len); self->last_doc_id = doc_id; }
bool_t Json_spew_json(Obj *dump, Folder *folder, const CharBuf *path) { CharBuf *json = Json_to_json(dump); if (!json) { ERR_ADD_FRAME(Err_get_error()); return false; } OutStream *outstream = Folder_Open_Out(folder, path); if (!outstream) { ERR_ADD_FRAME(Err_get_error()); DECREF(json); return false; } size_t size = CB_Get_Size(json); OutStream_Write_Bytes(outstream, CB_Get_Ptr8(json), size); OutStream_Close(outstream); DECREF(outstream); DECREF(json); return true; }
void MatchPostWriter_Write_Posting_IMP(MatchPostingWriter *self, RawPosting *posting) { MatchPostingWriterIVARS *const ivars = MatchPostWriter_IVARS(self); RawPostingIVARS *const posting_ivars = RawPost_IVARS(posting); OutStream *const outstream = ivars->outstream; const int32_t doc_id = posting_ivars->doc_id; const uint32_t delta_doc = doc_id - ivars->last_doc_id; char *const aux_content = posting_ivars->blob + posting_ivars->content_len; if (posting_ivars->freq == 1) { const uint32_t doc_code = (delta_doc << 1) | 1; OutStream_Write_C32(outstream, doc_code); } else { const uint32_t doc_code = delta_doc << 1; OutStream_Write_C32(outstream, doc_code); OutStream_Write_C32(outstream, posting_ivars->freq); } OutStream_Write_Bytes(outstream, aux_content, posting_ivars->aux_len); ivars->last_doc_id = doc_id; }
void HLWriter_Add_Segment_IMP(HighlightWriter *self, SegReader *reader, I32Array *doc_map) { HighlightWriterIVARS *const ivars = HLWriter_IVARS(self); int32_t doc_max = SegReader_Doc_Max(reader); if (doc_max == 0) { // Bail if the supplied segment is empty. return; } else { DefaultHighlightReader *hl_reader = (DefaultHighlightReader*)CERTIFY( SegReader_Obtain(reader, Class_Get_Name(HIGHLIGHTREADER)), DEFAULTHIGHLIGHTREADER); OutStream *dat_out = S_lazy_init(self); OutStream *ix_out = ivars->ix_out; int32_t orig; ByteBuf *bb = BB_new(0); for (orig = 1; orig <= doc_max; orig++) { // Skip deleted docs. if (doc_map && !I32Arr_Get(doc_map, orig)) { continue; } // Write file pointer. OutStream_Write_I64(ix_out, OutStream_Tell(dat_out)); // Copy the raw record. DefHLReader_Read_Record(hl_reader, orig, bb); OutStream_Write_Bytes(dat_out, BB_Get_Buf(bb), BB_Get_Size(bb)); BB_Set_Size(bb, 0); } DECREF(bb); } }
void DocWriter_Add_Segment_IMP(DocWriter *self, SegReader *reader, I32Array *doc_map) { DocWriterIVARS *const ivars = DocWriter_IVARS(self); int32_t doc_max = SegReader_Doc_Max(reader); if (doc_max == 0) { // Bail if the supplied segment is empty. return; } else { OutStream *const dat_out = S_lazy_init(self); OutStream *const ix_out = ivars->ix_out; ByteBuf *const buffer = BB_new(0); DefaultDocReader *const doc_reader = (DefaultDocReader*)CERTIFY( SegReader_Obtain(reader, VTable_Get_Name(DOCREADER)), DEFAULTDOCREADER); for (int32_t i = 1, max = SegReader_Doc_Max(reader); i <= max; i++) { if (I32Arr_Get(doc_map, i)) { int64_t start = OutStream_Tell(dat_out); // Copy record over. DefDocReader_Read_Record(doc_reader, buffer, i); char *buf = BB_Get_Buf(buffer); size_t size = BB_Get_Size(buffer); OutStream_Write_Bytes(dat_out, buf, size); // Write file pointer. OutStream_Write_I64(ix_out, start); } } DECREF(buffer); } }
void HLWriter_add_segment(HighlightWriter *self, SegReader *reader, I32Array *doc_map) { i32_t doc_max = SegReader_Doc_Max(reader); if (doc_max == 0) { /* Bail if the supplied segment is empty. */ return; } else { DefaultHighlightReader *hl_reader = (DefaultHighlightReader*) ASSERT_IS_A(SegReader_Obtain(reader, HIGHLIGHTREADER.name), DEFAULTHIGHLIGHTREADER); OutStream *dat_out = S_lazy_init(self); OutStream *ix_out = self->ix_out; i32_t orig; ByteBuf *bb = BB_new(0); for (orig = 1; orig <= doc_max; orig++) { /* Skip deleted docs. */ if (doc_map && !I32Arr_Get(doc_map, orig)) continue; /* Write file pointer. */ OutStream_Write_U64( ix_out, OutStream_Tell(dat_out) ); /* Copy the raw record. */ DefHLReader_Read_Record(hl_reader, orig, bb); OutStream_Write_Bytes(dat_out, bb->ptr, bb->size); bb->size = 0; } DECREF(bb); } }
static void test_Local_Open_FileHandle(TestBatchRunner *runner) { Folder *real_folder = S_folder_with_contents(); CompoundFileReader *cf_reader = CFReader_open(real_folder); FileHandle *fh; OutStream *outstream = CFReader_Open_Out(cf_reader, baz); OutStream_Write_Bytes(outstream, "baz", 3); OutStream_Close(outstream); DECREF(outstream); fh = CFReader_Local_Open_FileHandle(cf_reader, baz, FH_READ_ONLY); TEST_TRUE(runner, fh != NULL, "Local_Open_FileHandle pass-through for real file"); DECREF(fh); Err_set_error(NULL); fh = CFReader_Local_Open_FileHandle(cf_reader, stuff, FH_READ_ONLY); TEST_TRUE(runner, fh == NULL, "Local_Open_FileHandle for non-existent file returns NULL"); TEST_TRUE(runner, Err_get_error() != NULL, "Local_Open_FileHandle for non-existent file sets global error"); Err_set_error(NULL); fh = CFReader_Local_Open_FileHandle(cf_reader, foo, FH_READ_ONLY); TEST_TRUE(runner, fh == NULL, "Local_Open_FileHandle for virtual file returns NULL"); TEST_TRUE(runner, Err_get_error() != NULL, "Local_Open_FileHandle for virtual file sets global error"); DECREF(cf_reader); DECREF(real_folder); }
static int32_t S_write_files(SortFieldWriter *self, OutStream *ord_out, OutStream *ix_out, OutStream *dat_out) { SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self); int8_t prim_id = ivars->prim_id; int32_t doc_max = (int32_t)Seg_Get_Count(ivars->segment); bool has_nulls = ivars->count == doc_max ? false : true; size_t size = (doc_max + 1) * sizeof(int32_t); int32_t *ords = (int32_t*)MALLOCATE(size); int32_t ord = 0; int64_t dat_start = OutStream_Tell(dat_out); // Assign -1 as a stand-in for the NULL ord. for (int32_t i = 0; i <= doc_max; i++) { ords[i] = -1; } // Grab the first item and record its ord. Add a dummy ord for invalid // doc id 0. SFWriterElem *elem = (SFWriterElem*)SortFieldWriter_Fetch(self); SFWriterElemIVARS *elem_ivars = SFWriterElem_IVARS(elem); if (elem_ivars->doc_id > doc_max) { THROW(ERR, "doc_id %i32 greater than doc_max %i32", elem_ivars->doc_id, doc_max); } ords[elem_ivars->doc_id] = ord; ords[0] = 0; // Build array of ords, write non-NULL sorted values. Obj *last_val = INCREF(elem_ivars->value); S_write_val(elem_ivars->value, prim_id, ix_out, dat_out, dat_start); DECREF(elem); while (NULL != (elem = (SFWriterElem*)SortFieldWriter_Fetch(self))) { elem_ivars = SFWriterElem_IVARS(elem); if (elem_ivars->value != last_val) { int32_t comparison = FType_Compare_Values(ivars->type, elem_ivars->value, last_val); if (comparison != 0) { ord++; S_write_val(elem_ivars->value, prim_id, ix_out, dat_out, dat_start); } DECREF(last_val); last_val = INCREF(elem_ivars->value); } if (elem_ivars->doc_id > doc_max) { THROW(ERR, "doc_id %i32 greater than doc_max %i32", elem_ivars->doc_id, doc_max); } ords[elem_ivars->doc_id] = ord; DECREF(elem); } DECREF(last_val); // If there are NULL values, write one now and record the NULL ord. if (has_nulls) { S_write_val(NULL, prim_id, ix_out, dat_out, dat_start); ord++; ivars->null_ord = ord; } int32_t null_ord = ivars->null_ord; // Write one extra file pointer so that we can always derive length. if (ivars->var_width) { OutStream_Write_I64(ix_out, OutStream_Tell(dat_out) - dat_start); } // Calculate cardinality and ord width. int32_t cardinality = ord + 1; ivars->ord_width = S_calc_width(cardinality); int32_t ord_width = ivars->ord_width; // Write ords. const double BITS_PER_BYTE = 8.0; double bytes_per_doc = ord_width / BITS_PER_BYTE; double byte_count = ceil((doc_max + 1) * bytes_per_doc); char *compressed_ords = (char*)CALLOCATE((size_t)byte_count, sizeof(char)); for (int32_t i = 0; i <= doc_max; i++) { int32_t real_ord = ords[i] == -1 ? null_ord : ords[i]; S_write_ord(compressed_ords, ord_width, i, real_ord); } OutStream_Write_Bytes(ord_out, compressed_ords, (size_t)byte_count); FREEMEM(compressed_ords); FREEMEM(ords); return cardinality; }
void DocWriter_Add_Inverted_Doc_IMP(DocWriter *self, Inverter *inverter, int32_t doc_id) { DocWriterIVARS *const ivars = DocWriter_IVARS(self); OutStream *dat_out = S_lazy_init(self); OutStream *ix_out = ivars->ix_out; uint32_t num_stored = 0; int64_t start = OutStream_Tell(dat_out); int64_t expected = OutStream_Tell(ix_out) / 8; // Verify doc id. if (doc_id != expected) { THROW(ERR, "Expected doc id %i64 but got %i32", expected, doc_id); } // Write the number of stored fields. Inverter_Iterate(inverter); while (Inverter_Next(inverter)) { FieldType *type = Inverter_Get_Type(inverter); if (FType_Stored(type)) { num_stored++; } } OutStream_Write_C32(dat_out, num_stored); Inverter_Iterate(inverter); while (Inverter_Next(inverter)) { // Only store fields marked as "stored". FieldType *type = Inverter_Get_Type(inverter); if (FType_Stored(type)) { String *field = Inverter_Get_Field_Name(inverter); Obj *value = Inverter_Get_Value(inverter); Freezer_serialize_string(field, dat_out); switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: { const char *buf = Str_Get_Ptr8((String*)value); size_t size = Str_Get_Size((String*)value); OutStream_Write_C32(dat_out, size); OutStream_Write_Bytes(dat_out, buf, size); break; } case FType_BLOB: { char *buf = BB_Get_Buf((ByteBuf*)value); size_t size = BB_Get_Size((ByteBuf*)value); OutStream_Write_C32(dat_out, size); OutStream_Write_Bytes(dat_out, buf, size); break; } case FType_INT32: { int32_t val = Int32_Get_Value((Integer32*)value); OutStream_Write_C32(dat_out, val); break; } case FType_INT64: { int64_t val = Int64_Get_Value((Integer64*)value); OutStream_Write_C64(dat_out, val); break; } case FType_FLOAT32: { float val = Float32_Get_Value((Float32*)value); OutStream_Write_F32(dat_out, val); break; } case FType_FLOAT64: { double val = Float64_Get_Value((Float64*)value); OutStream_Write_F64(dat_out, val); break; } default: THROW(ERR, "Unrecognized type: %o", type); } } } // Write file pointer. OutStream_Write_I64(ix_out, start); }
void Freezer_serialize_blob(Blob *blob, OutStream *outstream) { size_t size = Blob_Get_Size(blob); OutStream_Write_C32(outstream, size); OutStream_Write_Bytes(outstream, Blob_Get_Buf(blob), size); }
static void S_write_val(Obj *val, int8_t prim_id, OutStream *ix_out, OutStream *dat_out, int64_t dat_start) { if (val) { switch (prim_id & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: { String *string = (String*)val; int64_t dat_pos = OutStream_Tell(dat_out) - dat_start; OutStream_Write_I64(ix_out, dat_pos); OutStream_Write_Bytes(dat_out, Str_Get_Ptr8(string), Str_Get_Size(string)); break; } case FType_BLOB: { Blob *blob = (Blob*)val; int64_t dat_pos = OutStream_Tell(dat_out) - dat_start; OutStream_Write_I64(ix_out, dat_pos); OutStream_Write_Bytes(dat_out, Blob_Get_Buf(blob), Blob_Get_Size(blob)); break; } case FType_INT32: { int32_t i32 = (int32_t)Int_Get_Value((Integer*)val); OutStream_Write_I32(dat_out, i32); break; } case FType_INT64: { int64_t i64 = Int_Get_Value((Integer*)val); OutStream_Write_I64(dat_out, i64); break; } case FType_FLOAT32: { float f32 = (float)Float_Get_Value((Float*)val); OutStream_Write_F32(dat_out, f32); break; } case FType_FLOAT64: { double f64 = Float_Get_Value((Float*)val); OutStream_Write_F64(dat_out, f64); break; } default: THROW(ERR, "Unrecognized primitive id: %i32", (int32_t)prim_id); } } else { switch (prim_id & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: case FType_BLOB: { int64_t dat_pos = OutStream_Tell(dat_out) - dat_start; OutStream_Write_I64(ix_out, dat_pos); } break; case FType_INT32: OutStream_Write_I32(dat_out, 0); break; case FType_INT64: OutStream_Write_I64(dat_out, 0); break; case FType_FLOAT64: OutStream_Write_F64(dat_out, 0.0); break; case FType_FLOAT32: OutStream_Write_F32(dat_out, 0.0f); break; default: THROW(ERR, "Unrecognized primitive id: %i32", (int32_t)prim_id); } } }