void LexWriter_finish_field(LexiconWriter *self, int32_t field_num) { CharBuf *field = Seg_Field_Name(self->segment, field_num); // Store count of terms for this field as metadata. Hash_Store(self->counts, (Obj*)field, (Obj*)CB_newf("%i32", self->count)); Hash_Store(self->ix_counts, (Obj*)field, (Obj*)CB_newf("%i32", self->ix_count)); // Close streams. OutStream_Close(self->dat_out); OutStream_Close(self->ix_out); OutStream_Close(self->ixix_out); DECREF(self->dat_out); DECREF(self->ix_out); DECREF(self->ixix_out); self->dat_out = NULL; self->ix_out = NULL; self->ixix_out = NULL; // Close term stepper. DECREF(self->term_stepper); self->term_stepper = NULL; }
static Folder* S_folder_with_contents() { RAMFolder *folder = RAMFolder_new(seg_1); OutStream *foo_out = RAMFolder_Open_Out(folder, foo); OutStream *bar_out = RAMFolder_Open_Out(folder, bar); OutStream_Write_Bytes(foo_out, "foo", 3); OutStream_Write_Bytes(bar_out, "bar", 3); OutStream_Close(foo_out); OutStream_Close(bar_out); DECREF(foo_out); DECREF(bar_out); return (Folder*)folder; }
void DocWriter_finish(DocWriter *self) { if (self->dat_out) { // Write one final file pointer, so that we can derive the length of // the last record. int64_t end = OutStream_Tell(self->dat_out); OutStream_Write_I64(self->ix_out, end); // Close down output streams. OutStream_Close(self->dat_out); OutStream_Close(self->ix_out); Seg_Store_Metadata_Str(self->segment, "documents", 9, (Obj*)DocWriter_Metadata(self)); } }
static Folder* S_folder_with_contents() { RAMFolder *folder = RAMFolder_new(seg_1); OutStream *foo_out = RAMFolder_Open_Out(folder, foo); OutStream *bar_out = RAMFolder_Open_Out(folder, bar); OutStream_Write_Bytes(foo_out, "foo", 3); OutStream_Write_Bytes(bar_out, "bar", 3); OutStream_Close(foo_out); OutStream_Close(bar_out); DECREF(foo_out); DECREF(bar_out); String *empty = SSTR_BLANK(); RAMFolder_Consolidate(folder, empty); return (Folder*)folder; }
static void S_read_fsfolder(RAMFolder *self) { u32_t i, max; /* Open an FSFolder for reading. */ FSFolder *source_folder = FSFolder_new(self->path); VArray *files = FSFolder_List(source_folder); /* Copy every file in the FSFolder into RAM. */ for (i = 0, max = VA_Get_Size(files); i < max; i++) { CharBuf *filepath = (CharBuf*)VA_Fetch(files, i); InStream *source_stream = FSFolder_Open_In(source_folder, filepath); OutStream *outstream = RAMFolder_Open_Out(self, filepath); if (!source_stream) { THROW("Can't open %o", filepath); } if (!outstream) { THROW("Can't open %o", filepath); } OutStream_Absorb(outstream, source_stream); OutStream_Close(outstream); InStream_Close(source_stream); DECREF(outstream); DECREF(source_stream); } DECREF(files); FSFolder_Close(source_folder); DECREF(source_folder); }
void HLWriter_Finish_IMP(HighlightWriter *self) { HighlightWriterIVARS *const ivars = HLWriter_IVARS(self); if (ivars->dat_out) { // Write one final file pointer, so that we can derive the length of // the last record. int64_t end = OutStream_Tell(ivars->dat_out); OutStream_Write_I64(ivars->ix_out, end); // Close down the output streams. OutStream_Close(ivars->dat_out); OutStream_Close(ivars->ix_out); Seg_Store_Metadata_Utf8(ivars->segment, "highlight", 9, (Obj*)HLWriter_Metadata(self)); } }
void DefDelWriter_Finish_IMP(DefaultDeletionsWriter *self) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); Folder *const folder = ivars->folder; for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i); if (ivars->updated[i]) { BitVector *deldocs = (BitVector*)VA_Fetch(ivars->bit_vecs, i); int32_t doc_max = SegReader_Doc_Max(seg_reader); double used = (doc_max + 1) / 8.0; uint32_t byte_size = (uint32_t)ceil(used); uint32_t new_max = byte_size * 8 - 1; String *filename = S_del_filename(self, seg_reader); OutStream *outstream = Folder_Open_Out(folder, filename); if (!outstream) { RETHROW(INCREF(Err_get_error())); } // Ensure that we have 1 bit for each doc in segment. BitVec_Grow(deldocs, new_max); // Write deletions data and clean up. OutStream_Write_Bytes(outstream, (char*)BitVec_Get_Raw_Bits(deldocs), byte_size); OutStream_Close(outstream); DECREF(outstream); DECREF(filename); } } Seg_Store_Metadata_Utf8(ivars->segment, "deletions", 9, (Obj*)DefDelWriter_Metadata(self)); }
void HLWriter_finish(HighlightWriter *self) { if (self->dat_out) { /* Write one final file pointer, so that we can derive the length of * the last record. */ i64_t end = OutStream_Tell(self->dat_out); OutStream_Write_U64(self->ix_out, end); /* Close down the output streams. */ OutStream_Close(self->dat_out); OutStream_Close(self->ix_out); Seg_Store_Metadata_Str(self->segment, "highlight", 9, (Obj*)HLWriter_Metadata(self)); } }
static void S_write_lockfile_json(void *context) { struct lockfile_context *stuff = (struct lockfile_context*)context; size_t size = Str_Get_Size(stuff->json); OutStream_Write_Bytes(stuff->outstream, Str_Get_Ptr8(stuff->json), size); OutStream_Close(stuff->outstream); }
static void test_Local_Open_In(TestBatchRunner *runner) { Folder *real_folder = S_folder_with_contents(); CompoundFileReader *cf_reader = CFReader_open(real_folder); InStream *instream; instream = CFReader_Local_Open_In(cf_reader, foo); TEST_TRUE(runner, instream != NULL, "Local_Open_In for virtual file"); TEST_TRUE(runner, Str_Starts_With(InStream_Get_Filename(instream), CFReader_Get_Path(cf_reader)), "InStream's path includes directory"); DECREF(instream); OutStream *outstream = CFReader_Open_Out(cf_reader, baz); OutStream_Write_Bytes(outstream, "baz", 3); OutStream_Close(outstream); DECREF(outstream); instream = CFReader_Local_Open_In(cf_reader, baz); TEST_TRUE(runner, instream != NULL, "Local_Open_In pass-through for real file"); DECREF(instream); Err_set_error(NULL); instream = CFReader_Local_Open_In(cf_reader, stuff); TEST_TRUE(runner, instream == NULL, "Local_Open_In for non-existent file returns NULL"); TEST_TRUE(runner, Err_get_error() != NULL, "Local_Open_In for non-existent file sets global error"); DECREF(cf_reader); DECREF(real_folder); }
static void test_refill(TestBatchRunner *runner) { RAMFile *file = RAMFile_new(NULL, false); OutStream *outstream = OutStream_open((Obj*)file); InStream *instream; char scratch[5]; InStreamIVARS *ivars; for (int32_t i = 0; i < 1023; i++) { OutStream_Write_U8(outstream, 'x'); } OutStream_Write_U8(outstream, 'y'); OutStream_Write_U8(outstream, 'z'); OutStream_Close(outstream); instream = InStream_open((Obj*)file); ivars = InStream_IVARS(instream); InStream_Refill(instream); TEST_INT_EQ(runner, ivars->limit - ivars->buf, IO_STREAM_BUF_SIZE, "Refill"); TEST_INT_EQ(runner, (long)InStream_Tell(instream), 0, "Correct file pos after standing-start Refill()"); DECREF(instream); instream = InStream_open((Obj*)file); ivars = InStream_IVARS(instream); InStream_Fill(instream, 30); TEST_INT_EQ(runner, ivars->limit - ivars->buf, 30, "Fill()"); TEST_INT_EQ(runner, (long)InStream_Tell(instream), 0, "Correct file pos after standing-start Fill()"); DECREF(instream); instream = InStream_open((Obj*)file); ivars = InStream_IVARS(instream); InStream_Read_Bytes(instream, scratch, 5); TEST_INT_EQ(runner, ivars->limit - ivars->buf, IO_STREAM_BUF_SIZE - 5, "small read triggers refill"); DECREF(instream); instream = InStream_open((Obj*)file); ivars = InStream_IVARS(instream); TEST_INT_EQ(runner, InStream_Read_U8(instream), 'x', "Read_U8"); InStream_Seek(instream, 1023); TEST_INT_EQ(runner, (long)FileWindow_IVARS(ivars->window)->offset, 0, "no unnecessary refill on Seek"); TEST_INT_EQ(runner, (long)InStream_Tell(instream), 1023, "Seek/Tell"); TEST_INT_EQ(runner, InStream_Read_U8(instream), 'y', "correct data after in-buffer Seek()"); TEST_INT_EQ(runner, InStream_Read_U8(instream), 'z', "automatic Refill"); TEST_TRUE(runner, (FileWindow_IVARS(ivars->window)->offset != 0), "refilled"); DECREF(instream); DECREF(outstream); DECREF(file); }
int32_t SortFieldWriter_Finish_IMP(SortFieldWriter *self) { SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self); // Bail if there's no data. if (!SortFieldWriter_Peek(self)) { return 0; } int32_t field_num = ivars->field_num; Folder *folder = PolyReader_Get_Folder(ivars->polyreader); String *seg_name = Seg_Get_Name(ivars->segment); // Open streams. String *ord_path = Str_newf("%o/sort-%i32.ord", seg_name, field_num); OutStream *ord_out = Folder_Open_Out(folder, ord_path); DECREF(ord_path); if (!ord_out) { RETHROW(INCREF(Err_get_error())); } OutStream *ix_out = NULL; if (ivars->var_width) { String *ix_path = Str_newf("%o/sort-%i32.ix", seg_name, field_num); ix_out = Folder_Open_Out(folder, ix_path); DECREF(ix_path); if (!ix_out) { RETHROW(INCREF(Err_get_error())); } } String *dat_path = Str_newf("%o/sort-%i32.dat", seg_name, field_num); OutStream *dat_out = Folder_Open_Out(folder, dat_path); DECREF(dat_path); if (!dat_out) { RETHROW(INCREF(Err_get_error())); } int32_t cardinality = S_write_files(self, ord_out, ix_out, dat_out); // Close streams. OutStream_Close(ord_out); if (ix_out) { OutStream_Close(ix_out); } OutStream_Close(dat_out); DECREF(dat_out); DECREF(ix_out); DECREF(ord_out); return cardinality; }
static void test_Clone_and_Reopen(TestBatchRunner *runner) { String *foo = SSTR_WRAP_C("foo"); String *bar = SSTR_WRAP_C("bar"); RAMFile *file = RAMFile_new(NULL, false); OutStream *outstream = OutStream_open((Obj*)file); RAMFileHandle *fh; InStream *instream; InStream *clone; InStream *reopened; for (uint8_t i = 0; i < 26; i++) { OutStream_Write_U8(outstream, 'a' + i); } OutStream_Close(outstream); fh = RAMFH_open(foo, FH_READ_ONLY, file); instream = InStream_open((Obj*)fh); InStream_Seek(instream, 1); TEST_TRUE(runner, Str_Equals(InStream_Get_Filename(instream), (Obj*)foo), "Get_Filename"); clone = InStream_Clone(instream); TEST_TRUE(runner, Str_Equals(InStream_Get_Filename(clone), (Obj*)foo), "Clones have same filename"); TEST_TRUE(runner, InStream_Length(instream) == InStream_Length(clone), "Clones have same length"); TEST_TRUE(runner, InStream_Read_U8(instream) == InStream_Read_U8(clone), "Clones start at same file position"); reopened = InStream_Reopen(instream, bar, 25, 1); TEST_TRUE(runner, Str_Equals(InStream_Get_Filename(reopened), (Obj*)bar), "Reopened InStreams take new filename"); TEST_TRUE(runner, InStream_Read_U8(reopened) == 'z', "Reopened stream starts at supplied offset"); TEST_TRUE(runner, InStream_Length(reopened) == 1, "Reopened stream uses supplied length"); TEST_TRUE(runner, InStream_Tell(reopened) == 1, "Tell() uses supplied offset for reopened stream"); InStream_Seek(reopened, 0); TEST_TRUE(runner, InStream_Read_U8(reopened) == 'z', "Seek() uses supplied offset for reopened stream"); DECREF(reopened); DECREF(clone); DECREF(instream); DECREF(outstream); DECREF(fh); DECREF(file); }
static void test_Read_Write_Bytes(TestBatchRunner *runner) { RAMFile *file = RAMFile_new(NULL, false); OutStream *outstream = OutStream_open((Obj*)file); InStream *instream; char buf[4]; OutStream_Write_Bytes(outstream, "foo", 4); OutStream_Close(outstream); instream = InStream_open((Obj*)file); InStream_Read_Bytes(instream, buf, 4); TEST_TRUE(runner, strcmp(buf, "foo") == 0, "Read_Bytes Write_Bytes"); DECREF(instream); DECREF(outstream); DECREF(file); }
bool_t Json_spew_json(Obj *dump, Folder *folder, const CharBuf *path) { CharBuf *json = Json_to_json(dump); if (!json) { ERR_ADD_FRAME(Err_get_error()); return false; } OutStream *outstream = Folder_Open_Out(folder, path); if (!outstream) { ERR_ADD_FRAME(Err_get_error()); DECREF(json); return false; } size_t size = CB_Get_Size(json); OutStream_Write_Bytes(outstream, CB_Get_Ptr8(json), size); OutStream_Close(outstream); DECREF(outstream); DECREF(json); return true; }
static void test_Buf(TestBatchRunner *runner) { RAMFile *file = RAMFile_new(NULL, false); OutStream *outstream = OutStream_open((Obj*)file); size_t size = IO_STREAM_BUF_SIZE * 2 + 5; InStream *instream; char *buf; for (uint32_t i = 0; i < size; i++) { OutStream_Write_U8(outstream, 'a'); } OutStream_Close(outstream); instream = InStream_open((Obj*)file); InStreamIVARS *const ivars = InStream_IVARS(instream); buf = InStream_Buf(instream, 5); TEST_INT_EQ(runner, ivars->limit - buf, IO_STREAM_BUF_SIZE, "Small request bumped up"); buf += IO_STREAM_BUF_SIZE - 10; // 10 bytes left in buffer. InStream_Advance_Buf(instream, buf); buf = InStream_Buf(instream, 10); TEST_INT_EQ(runner, ivars->limit - buf, 10, "Exact request doesn't trigger refill"); buf = InStream_Buf(instream, 11); TEST_INT_EQ(runner, ivars->limit - buf, IO_STREAM_BUF_SIZE, "Requesting over limit triggers refill"); int64_t expected = InStream_Length(instream) - InStream_Tell(instream); char *buff = InStream_Buf(instream, 100000); int64_t got = PTR_TO_I64(ivars->limit) - PTR_TO_I64(buff); TEST_TRUE(runner, got == expected, "Requests greater than file size get pared down"); DECREF(instream); DECREF(outstream); DECREF(file); }
static void test_Local_Open_FileHandle(TestBatchRunner *runner) { Folder *real_folder = S_folder_with_contents(); CompoundFileReader *cf_reader = CFReader_open(real_folder); FileHandle *fh; OutStream *outstream = CFReader_Open_Out(cf_reader, baz); OutStream_Write_Bytes(outstream, "baz", 3); OutStream_Close(outstream); DECREF(outstream); fh = CFReader_Local_Open_FileHandle(cf_reader, baz, FH_READ_ONLY); TEST_TRUE(runner, fh != NULL, "Local_Open_FileHandle pass-through for real file"); DECREF(fh); Err_set_error(NULL); fh = CFReader_Local_Open_FileHandle(cf_reader, stuff, FH_READ_ONLY); TEST_TRUE(runner, fh == NULL, "Local_Open_FileHandle for non-existent file returns NULL"); TEST_TRUE(runner, Err_get_error() != NULL, "Local_Open_FileHandle for non-existent file sets global error"); Err_set_error(NULL); fh = CFReader_Local_Open_FileHandle(cf_reader, foo, FH_READ_ONLY); TEST_TRUE(runner, fh == NULL, "Local_Open_FileHandle for virtual file returns NULL"); TEST_TRUE(runner, Err_get_error() != NULL, "Local_Open_FileHandle for virtual file sets global error"); DECREF(cf_reader); DECREF(real_folder); }
static void S_do_consolidate(CompoundFileWriter *self, CompoundFileWriterIVARS *ivars) { UNUSED_VAR(self); Folder *folder = ivars->folder; Hash *metadata = Hash_new(0); Hash *sub_files = Hash_new(0); Vector *files = Folder_List(folder, NULL); Vector *merged = Vec_new(Vec_Get_Size(files)); String *cf_file = (String*)SSTR_WRAP_UTF8("cf.dat", 6); OutStream *outstream = Folder_Open_Out(folder, (String*)cf_file); bool rename_success; if (!outstream) { RETHROW(INCREF(Err_get_error())); } // Start metadata. Hash_Store_Utf8(metadata, "files", 5, INCREF(sub_files)); Hash_Store_Utf8(metadata, "format", 6, (Obj*)Str_newf("%i32", CFWriter_current_file_format)); Vec_Sort(files); for (uint32_t i = 0, max = Vec_Get_Size(files); i < max; i++) { String *infilename = (String*)Vec_Fetch(files, i); if (!Str_Ends_With_Utf8(infilename, ".json", 5)) { InStream *instream = Folder_Open_In(folder, infilename); Hash *file_data = Hash_new(2); int64_t offset, len; if (!instream) { RETHROW(INCREF(Err_get_error())); } // Absorb the file. offset = OutStream_Tell(outstream); OutStream_Absorb(outstream, instream); len = OutStream_Tell(outstream) - offset; // Record offset and length. Hash_Store_Utf8(file_data, "offset", 6, (Obj*)Str_newf("%i64", offset)); Hash_Store_Utf8(file_data, "length", 6, (Obj*)Str_newf("%i64", len)); Hash_Store(sub_files, infilename, (Obj*)file_data); Vec_Push(merged, INCREF(infilename)); // Add filler NULL bytes so that every sub-file begins on a file // position multiple of 8. OutStream_Align(outstream, 8); InStream_Close(instream); DECREF(instream); } } // Write metadata to cfmeta file. String *cfmeta_temp = (String*)SSTR_WRAP_UTF8("cfmeta.json.temp", 16); String *cfmeta_file = (String*)SSTR_WRAP_UTF8("cfmeta.json", 11); Json_spew_json((Obj*)metadata, (Folder*)ivars->folder, cfmeta_temp); rename_success = Folder_Rename(ivars->folder, cfmeta_temp, cfmeta_file); if (!rename_success) { RETHROW(INCREF(Err_get_error())); } // Clean up. OutStream_Close(outstream); DECREF(outstream); DECREF(files); DECREF(metadata); /* HashIterator *iter = HashIter_new(sub_files); while (HashIter_Next(iter)) { String *merged_file = HashIter_Get_Key(iter); if (!Folder_Delete(folder, merged_file)) { String *mess = MAKE_MESS("Can't delete '%o'", merged_file); DECREF(sub_files); Err_throw_mess(ERR, mess); } } DECREF(iter); */ DECREF(sub_files); for (uint32_t i = 0, max = Vec_Get_Size(merged); i < max; i++) { String *merged_file = (String*)Vec_Fetch(merged, i); if (!Folder_Delete(folder, merged_file)) { String *mess = MAKE_MESS("Can't delete '%o'", merged_file); DECREF(merged); Err_throw_mess(ERR, mess); } } DECREF(merged); }
void SortWriter_finish(SortWriter *self) { SortWriterIVARS *const ivars = SortWriter_IVARS(self); VArray *const field_writers = ivars->field_writers; // If we have no data, bail out. if (!ivars->temp_ord_out) { return; } // If we've either flushed or added segments, flush everything so that any // one field can use the entire margin up to mem_thresh. if (ivars->flush_at_finish) { for (uint32_t i = 1, max = VA_Get_Size(field_writers); i < max; i++) { SortFieldWriter *field_writer = (SortFieldWriter*)VA_Fetch(field_writers, i); if (field_writer) { SortFieldWriter_Flush(field_writer); } } } // Close down temp streams. OutStream_Close(ivars->temp_ord_out); OutStream_Close(ivars->temp_ix_out); OutStream_Close(ivars->temp_dat_out); for (uint32_t i = 1, max = VA_Get_Size(field_writers); i < max; i++) { SortFieldWriter *field_writer = (SortFieldWriter*)VA_Delete(field_writers, i); if (field_writer) { CharBuf *field = Seg_Field_Name(ivars->segment, i); SortFieldWriter_Flip(field_writer); int32_t count = SortFieldWriter_Finish(field_writer); Hash_Store(ivars->counts, (Obj*)field, (Obj*)CB_newf("%i32", count)); int32_t null_ord = SortFieldWriter_Get_Null_Ord(field_writer); if (null_ord != -1) { Hash_Store(ivars->null_ords, (Obj*)field, (Obj*)CB_newf("%i32", null_ord)); } int32_t ord_width = SortFieldWriter_Get_Ord_Width(field_writer); Hash_Store(ivars->ord_widths, (Obj*)field, (Obj*)CB_newf("%i32", ord_width)); } DECREF(field_writer); } VA_Clear(field_writers); // Store metadata. Seg_Store_Metadata_Str(ivars->segment, "sort", 4, (Obj*)SortWriter_Metadata(self)); // Clean up. Folder *folder = ivars->folder; CharBuf *seg_name = Seg_Get_Name(ivars->segment); CharBuf *path = CB_newf("%o/sort_ord_temp", seg_name); Folder_Delete(folder, path); CB_setf(path, "%o/sort_ix_temp", seg_name); Folder_Delete(folder, path); CB_setf(path, "%o/sort_dat_temp", seg_name); Folder_Delete(folder, path); DECREF(path); }