void LexWriter_start_field(LexiconWriter *self, i32_t field_num) { CharBuf *const seg_name = Seg_Get_Name(self->segment); Folder *const folder = self->folder; Snapshot *const snapshot = LexWriter_Get_Snapshot(self); /* Open outstreams. */ CB_setf(self->dat_file, "%o/lexicon-%i32.dat", seg_name, field_num); CB_setf(self->ix_file, "%o/lexicon-%i32.ix", seg_name, field_num); CB_setf(self->ixix_file, "%o/lexicon-%i32.ixix", seg_name, field_num); Snapshot_Add_Entry(snapshot, self->dat_file); Snapshot_Add_Entry(snapshot, self->ix_file); Snapshot_Add_Entry(snapshot, self->ixix_file); self->dat_out = Folder_Open_Out(folder, self->dat_file); self->ix_out = Folder_Open_Out(folder, self->ix_file); self->ixix_out = Folder_Open_Out(folder, self->ixix_file); if (!self->dat_out) { THROW("Can't open %o", self->dat_file); } if (!self->ix_out) { THROW("Can't open %o", self->ix_file); } if (!self->ixix_out) { THROW("Can't open %o", self->ixix_file); } /* Initialize count and ix_count, TermInfo and last term text. */ self->count = 0; self->ix_count = 0; TInfo_Reset(self->last_tinfo); CB_Set_Size(self->last_text, 0); }
static OutStream* S_lazy_init(HighlightWriter *self) { HighlightWriterIVARS *const ivars = HLWriter_IVARS(self); if (!ivars->dat_out) { Segment *segment = ivars->segment; Folder *folder = ivars->folder; String *seg_name = Seg_Get_Name(segment); // Open outstreams. String *ix_file = Str_newf("%o/highlight.ix", seg_name); ivars->ix_out = Folder_Open_Out(folder, ix_file); DECREF(ix_file); if (!ivars->ix_out) { RETHROW(INCREF(Err_get_error())); } String *dat_file = Str_newf("%o/highlight.dat", seg_name); ivars->dat_out = Folder_Open_Out(folder, dat_file); DECREF(dat_file); if (!ivars->dat_out) { RETHROW(INCREF(Err_get_error())); } // Go past invalid doc 0. OutStream_Write_I64(ivars->ix_out, 0); } return ivars->dat_out; }
static OutStream* S_lazy_init(HighlightWriter *self) { if (!self->dat_out) { Segment *segment = self->segment; Folder *folder = self->folder; CharBuf *seg_name = Seg_Get_Name(segment); // Open outstreams. { CharBuf *ix_file = CB_newf("%o/highlight.ix", seg_name); self->ix_out = Folder_Open_Out(folder, ix_file); DECREF(ix_file); if (!self->ix_out) { RETHROW(INCREF(Err_get_error())); } } { CharBuf *dat_file = CB_newf("%o/highlight.dat", seg_name); self->dat_out = Folder_Open_Out(folder, dat_file); DECREF(dat_file); if (!self->dat_out) { RETHROW(INCREF(Err_get_error())); } } // Go past invalid doc 0. OutStream_Write_I64(self->ix_out, 0); } return self->dat_out; }
void LexWriter_start_field(LexiconWriter *self, int32_t field_num) { Segment *const segment = LexWriter_Get_Segment(self); Folder *const folder = LexWriter_Get_Folder(self); Schema *const schema = LexWriter_Get_Schema(self); CharBuf *const seg_name = Seg_Get_Name(segment); CharBuf *const field = Seg_Field_Name(segment, field_num); FieldType *const type = Schema_Fetch_Type(schema, field); // Open outstreams. CB_setf(self->dat_file, "%o/lexicon-%i32.dat", seg_name, field_num); CB_setf(self->ix_file, "%o/lexicon-%i32.ix", seg_name, field_num); CB_setf(self->ixix_file, "%o/lexicon-%i32.ixix", seg_name, field_num); self->dat_out = Folder_Open_Out(folder, self->dat_file); if (!self->dat_out) { RETHROW(INCREF(Err_get_error())); } self->ix_out = Folder_Open_Out(folder, self->ix_file); if (!self->ix_out) { RETHROW(INCREF(Err_get_error())); } self->ixix_out = Folder_Open_Out(folder, self->ixix_file); if (!self->ixix_out) { RETHROW(INCREF(Err_get_error())); } // Initialize count and ix_count, term stepper and term info stepper. self->count = 0; self->ix_count = 0; self->term_stepper = FType_Make_Term_Stepper(type); TermStepper_Reset(self->tinfo_stepper); }
static OutStream* S_lazy_init(HighlightWriter *self) { if (!self->dat_out) { Segment *segment = self->segment; Folder *folder = self->folder; Snapshot *snapshot = HLWriter_Get_Snapshot(self); CharBuf *seg_name = Seg_Get_Name(segment); CharBuf *ix_file = CB_newf("%o/highlight.ix", seg_name); CharBuf *dat_file = CB_newf("%o/highlight.dat", seg_name); /* Open outstreams. */ Snapshot_Add_Entry(snapshot, ix_file); Snapshot_Add_Entry(snapshot, dat_file); self->ix_out = Folder_Open_Out(folder, ix_file); self->dat_out = Folder_Open_Out(folder, dat_file); if (!self->ix_out) { THROW("Can't open %o", ix_file); } if (!self->dat_out) { THROW("Can't open %o", dat_file); } DECREF(ix_file); DECREF(dat_file); /* Go past invalid doc 0. */ OutStream_Write_U64(self->ix_out, 0); } return self->dat_out; }
void DefDelWriter_Finish_IMP(DefaultDeletionsWriter *self) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); Folder *const folder = ivars->folder; for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i); if (ivars->updated[i]) { BitVector *deldocs = (BitVector*)VA_Fetch(ivars->bit_vecs, i); int32_t doc_max = SegReader_Doc_Max(seg_reader); double used = (doc_max + 1) / 8.0; uint32_t byte_size = (uint32_t)ceil(used); uint32_t new_max = byte_size * 8 - 1; String *filename = S_del_filename(self, seg_reader); OutStream *outstream = Folder_Open_Out(folder, filename); if (!outstream) { RETHROW(INCREF(Err_get_error())); } // Ensure that we have 1 bit for each doc in segment. BitVec_Grow(deldocs, new_max); // Write deletions data and clean up. OutStream_Write_Bytes(outstream, (char*)BitVec_Get_Raw_Bits(deldocs), byte_size); OutStream_Close(outstream); DECREF(outstream); DECREF(filename); } } Seg_Store_Metadata_Utf8(ivars->segment, "deletions", 9, (Obj*)DefDelWriter_Metadata(self)); }
static SortFieldWriter* S_lazy_init_field_writer(SortWriter *self, int32_t field_num) { SortWriterIVARS *const ivars = SortWriter_IVARS(self); SortFieldWriter *field_writer = (SortFieldWriter*)VA_Fetch(ivars->field_writers, field_num); if (!field_writer) { // Open temp files. if (!ivars->temp_ord_out) { Folder *folder = ivars->folder; CharBuf *seg_name = Seg_Get_Name(ivars->segment); CharBuf *path = CB_newf("%o/sort_ord_temp", seg_name); ivars->temp_ord_out = Folder_Open_Out(folder, path); if (!ivars->temp_ord_out) { DECREF(path); RETHROW(INCREF(Err_get_error())); } CB_setf(path, "%o/sort_ix_temp", seg_name); ivars->temp_ix_out = Folder_Open_Out(folder, path); if (!ivars->temp_ix_out) { DECREF(path); RETHROW(INCREF(Err_get_error())); } CB_setf(path, "%o/sort_dat_temp", seg_name); ivars->temp_dat_out = Folder_Open_Out(folder, path); if (!ivars->temp_dat_out) { DECREF(path); RETHROW(INCREF(Err_get_error())); } DECREF(path); } CharBuf *field = Seg_Field_Name(ivars->segment, field_num); field_writer = SortFieldWriter_new(ivars->schema, ivars->snapshot, ivars->segment, ivars->polyreader, field, ivars->mem_pool, ivars->mem_thresh, ivars->temp_ord_out, ivars->temp_ix_out, ivars->temp_dat_out); VA_Store(ivars->field_writers, field_num, (Obj*)field_writer); } return field_writer; }
static SortFieldWriter* S_lazy_init_field_writer(SortWriter *self, int32_t field_num) { SortWriterIVARS *const ivars = SortWriter_IVARS(self); SortFieldWriter *field_writer = (SortFieldWriter*)Vec_Fetch(ivars->field_writers, (size_t)field_num); if (!field_writer) { // Open temp files. if (!ivars->temp_ord_out) { Folder *folder = ivars->folder; String *seg_name = Seg_Get_Name(ivars->segment); String *ord_path = Str_newf("%o/sort_ord_temp", seg_name); ivars->temp_ord_out = Folder_Open_Out(folder, ord_path); DECREF(ord_path); if (!ivars->temp_ord_out) { RETHROW(INCREF(Err_get_error())); } String *ix_path = Str_newf("%o/sort_ix_temp", seg_name); ivars->temp_ix_out = Folder_Open_Out(folder, ix_path); DECREF(ix_path); if (!ivars->temp_ix_out) { RETHROW(INCREF(Err_get_error())); } String *dat_path = Str_newf("%o/sort_dat_temp", seg_name); ivars->temp_dat_out = Folder_Open_Out(folder, dat_path); DECREF(dat_path); if (!ivars->temp_dat_out) { RETHROW(INCREF(Err_get_error())); } } String *field = Seg_Field_Name(ivars->segment, field_num); field_writer = SortFieldWriter_new(ivars->schema, ivars->snapshot, ivars->segment, ivars->polyreader, field, ivars->counter, ivars->mem_thresh, ivars->temp_ord_out, ivars->temp_ix_out, ivars->temp_dat_out); Vec_Store(ivars->field_writers, (size_t)field_num, (Obj*)field_writer); } return field_writer; }
static void test_Open_Out(TestBatch *batch) { Folder *folder = (Folder*)RAMFolder_new(NULL); OutStream *outstream; Folder_MkDir(folder, &foo); outstream = Folder_Open_Out(folder, &boffo); TEST_TRUE(batch, outstream && OutStream_Is_A(outstream, OUTSTREAM), "Open_Out"); DECREF(outstream); outstream = Folder_Open_Out(folder, &foo_boffo); TEST_TRUE(batch, outstream && OutStream_Is_A(outstream, OUTSTREAM), "Open_Out for nested file"); DECREF(outstream); Err_set_error(NULL); outstream = Folder_Open_Out(folder, &boffo); TEST_TRUE(batch, outstream == NULL, "Open_OutStream on existing file fails"); TEST_TRUE(batch, Err_get_error() != NULL, "Open_Out on existing file sets Err_error"); Err_set_error(NULL); outstream = Folder_Open_Out(folder, &foo); TEST_TRUE(batch, outstream == NULL, "Open_OutStream on existing dir path fails"); TEST_TRUE(batch, Err_get_error() != NULL, "Open_Out on existing dir name sets Err_error"); Err_set_error(NULL); outstream = Folder_Open_Out(folder, &foo_bar_baz_boffo); TEST_TRUE(batch, outstream == NULL, "Open_Out for entry within non-existent dir fails"); TEST_TRUE(batch, Err_get_error() != NULL, "Open_Out for entry within non-existent dir sets Err_error"); DECREF(folder); }
int32_t SortFieldWriter_Finish_IMP(SortFieldWriter *self) { SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self); // Bail if there's no data. if (!SortFieldWriter_Peek(self)) { return 0; } int32_t field_num = ivars->field_num; Folder *folder = PolyReader_Get_Folder(ivars->polyreader); String *seg_name = Seg_Get_Name(ivars->segment); // Open streams. String *ord_path = Str_newf("%o/sort-%i32.ord", seg_name, field_num); OutStream *ord_out = Folder_Open_Out(folder, ord_path); DECREF(ord_path); if (!ord_out) { RETHROW(INCREF(Err_get_error())); } OutStream *ix_out = NULL; if (ivars->var_width) { String *ix_path = Str_newf("%o/sort-%i32.ix", seg_name, field_num); ix_out = Folder_Open_Out(folder, ix_path); DECREF(ix_path); if (!ix_out) { RETHROW(INCREF(Err_get_error())); } } String *dat_path = Str_newf("%o/sort-%i32.dat", seg_name, field_num); OutStream *dat_out = Folder_Open_Out(folder, dat_path); DECREF(dat_path); if (!dat_out) { RETHROW(INCREF(Err_get_error())); } int32_t cardinality = S_write_files(self, ord_out, ix_out, dat_out); // Close streams. OutStream_Close(ord_out); if (ix_out) { OutStream_Close(ix_out); } OutStream_Close(dat_out); DECREF(dat_out); DECREF(ix_out); DECREF(ord_out); return cardinality; }
static OutStream* S_lazy_init(DocWriter *self) { if (!self->dat_out) { Folder *folder = self->folder; CharBuf *seg_name = Seg_Get_Name(self->segment); // Get streams. CharBuf *ix_file = CB_newf("%o/documents.ix", seg_name); self->ix_out = Folder_Open_Out(folder, ix_file); DECREF(ix_file); if (!self->ix_out) { RETHROW(INCREF(Err_get_error())); } CharBuf *dat_file = CB_newf("%o/documents.dat", seg_name); self->dat_out = Folder_Open_Out(folder, dat_file); DECREF(dat_file); if (!self->dat_out) { RETHROW(INCREF(Err_get_error())); } // Go past non-doc #0. OutStream_Write_I64(self->ix_out, 0); } return self->dat_out; }
static OutStream* S_lazy_init(DocWriter *self) { DocWriterIVARS *const ivars = DocWriter_IVARS(self); if (!ivars->dat_out) { Folder *folder = ivars->folder; String *seg_name = Seg_Get_Name(ivars->segment); // Get streams. String *ix_file = Str_newf("%o/documents.ix", seg_name); ivars->ix_out = Folder_Open_Out(folder, ix_file); DECREF(ix_file); if (!ivars->ix_out) { RETHROW(INCREF(Err_get_error())); } String *dat_file = Str_newf("%o/documents.dat", seg_name); ivars->dat_out = Folder_Open_Out(folder, dat_file); DECREF(dat_file); if (!ivars->dat_out) { RETHROW(INCREF(Err_get_error())); } // Go past non-doc #0. OutStream_Write_I64(ivars->ix_out, 0); } return ivars->dat_out; }
MatchPostingWriter* MatchPostWriter_init(MatchPostingWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader, int32_t field_num) { Folder *folder = PolyReader_Get_Folder(polyreader); CharBuf *filename = CB_newf("%o/postings-%i32.dat", Seg_Get_Name(segment), field_num); PostWriter_init((PostingWriter*)self, schema, snapshot, segment, polyreader, field_num); self->outstream = Folder_Open_Out(folder, filename); if (!self->outstream) { RETHROW(INCREF(Err_get_error())); } DECREF(filename); return self; }
bool_t Json_spew_json(Obj *dump, Folder *folder, const CharBuf *path) { CharBuf *json = Json_to_json(dump); if (!json) { ERR_ADD_FRAME(Err_get_error()); return false; } OutStream *outstream = Folder_Open_Out(folder, path); if (!outstream) { ERR_ADD_FRAME(Err_get_error()); DECREF(json); return false; } size_t size = CB_Get_Size(json); OutStream_Write_Bytes(outstream, CB_Get_Ptr8(json), size); OutStream_Close(outstream); DECREF(outstream); DECREF(json); return true; }
bool LFLock_Request_IMP(LockFileLock *self) { LockFileLockIVARS *const ivars = LFLock_IVARS(self); bool success = false; if (Folder_Exists(ivars->folder, ivars->lock_path)) { Err_set_error((Err*)LockErr_new(Str_newf("Can't obtain lock: '%o' exists", ivars->lock_path))); return false; } // Create the "locks" subdirectory if necessary. String *lock_dir_name = (String*)SSTR_WRAP_UTF8("locks", 5); if (!Folder_Exists(ivars->folder, lock_dir_name)) { if (!Folder_MkDir(ivars->folder, lock_dir_name)) { Err *mkdir_err = (Err*)CERTIFY(Err_get_error(), ERR); LockErr *err = LockErr_new(Str_newf("Can't create 'locks' directory: %o", Err_Get_Mess(mkdir_err))); // Maybe our attempt failed because another process succeeded. if (Folder_Find_Folder(ivars->folder, lock_dir_name)) { DECREF(err); } else { // Nope, everything failed, so bail out. Err_set_error((Err*)err); return false; } } } // Prepare to write pid, lock name, and host to the lock file as JSON. Hash *file_data = Hash_new(3); Hash_Store_Utf8(file_data, "pid", 3, (Obj*)Str_newf("%i32", (int32_t)PID_getpid())); Hash_Store_Utf8(file_data, "host", 4, INCREF(ivars->host)); Hash_Store_Utf8(file_data, "name", 4, INCREF(ivars->name)); String *json = Json_to_json((Obj*)file_data); DECREF(file_data); // Write to a temporary file, then use the creation of a hard link to // ensure atomic but non-destructive creation of the lockfile with its // complete contents. OutStream *outstream = Folder_Open_Out(ivars->folder, ivars->link_path); if (!outstream) { ERR_ADD_FRAME(Err_get_error()); DECREF(json); return false; } struct lockfile_context context; context.outstream = outstream; context.json = json; Err *json_error = Err_trap(S_write_lockfile_json, &context); bool wrote_json = !json_error; DECREF(outstream); DECREF(json); if (wrote_json) { success = Folder_Hard_Link(ivars->folder, ivars->link_path, ivars->lock_path); if (!success) { Err *hard_link_err = (Err*)CERTIFY(Err_get_error(), ERR); Err_set_error((Err*)LockErr_new(Str_newf("Failed to obtain lock at '%o': %o", ivars->lock_path, Err_Get_Mess(hard_link_err)))); } } else { Err_set_error((Err*)LockErr_new(Str_newf("Failed to obtain lock at '%o': %o", ivars->lock_path, Err_Get_Mess(json_error)))); DECREF(json_error); } // Verify that our temporary file got zapped. bool deletion_failed = !Folder_Delete(ivars->folder, ivars->link_path); if (deletion_failed) { String *mess = MAKE_MESS("Failed to delete '%o'", ivars->link_path); Err_throw_mess(ERR, mess); } return success; }
static void S_do_consolidate(CompoundFileWriter *self, CompoundFileWriterIVARS *ivars) { UNUSED_VAR(self); Folder *folder = ivars->folder; Hash *metadata = Hash_new(0); Hash *sub_files = Hash_new(0); Vector *files = Folder_List(folder, NULL); Vector *merged = Vec_new(Vec_Get_Size(files)); String *cf_file = (String*)SSTR_WRAP_UTF8("cf.dat", 6); OutStream *outstream = Folder_Open_Out(folder, (String*)cf_file); bool rename_success; if (!outstream) { RETHROW(INCREF(Err_get_error())); } // Start metadata. Hash_Store_Utf8(metadata, "files", 5, INCREF(sub_files)); Hash_Store_Utf8(metadata, "format", 6, (Obj*)Str_newf("%i32", CFWriter_current_file_format)); Vec_Sort(files); for (uint32_t i = 0, max = Vec_Get_Size(files); i < max; i++) { String *infilename = (String*)Vec_Fetch(files, i); if (!Str_Ends_With_Utf8(infilename, ".json", 5)) { InStream *instream = Folder_Open_In(folder, infilename); Hash *file_data = Hash_new(2); int64_t offset, len; if (!instream) { RETHROW(INCREF(Err_get_error())); } // Absorb the file. offset = OutStream_Tell(outstream); OutStream_Absorb(outstream, instream); len = OutStream_Tell(outstream) - offset; // Record offset and length. Hash_Store_Utf8(file_data, "offset", 6, (Obj*)Str_newf("%i64", offset)); Hash_Store_Utf8(file_data, "length", 6, (Obj*)Str_newf("%i64", len)); Hash_Store(sub_files, infilename, (Obj*)file_data); Vec_Push(merged, INCREF(infilename)); // Add filler NULL bytes so that every sub-file begins on a file // position multiple of 8. OutStream_Align(outstream, 8); InStream_Close(instream); DECREF(instream); } } // Write metadata to cfmeta file. String *cfmeta_temp = (String*)SSTR_WRAP_UTF8("cfmeta.json.temp", 16); String *cfmeta_file = (String*)SSTR_WRAP_UTF8("cfmeta.json", 11); Json_spew_json((Obj*)metadata, (Folder*)ivars->folder, cfmeta_temp); rename_success = Folder_Rename(ivars->folder, cfmeta_temp, cfmeta_file); if (!rename_success) { RETHROW(INCREF(Err_get_error())); } // Clean up. OutStream_Close(outstream); DECREF(outstream); DECREF(files); DECREF(metadata); /* HashIterator *iter = HashIter_new(sub_files); while (HashIter_Next(iter)) { String *merged_file = HashIter_Get_Key(iter); if (!Folder_Delete(folder, merged_file)) { String *mess = MAKE_MESS("Can't delete '%o'", merged_file); DECREF(sub_files); Err_throw_mess(ERR, mess); } } DECREF(iter); */ DECREF(sub_files); for (uint32_t i = 0, max = Vec_Get_Size(merged); i < max; i++) { String *merged_file = (String*)Vec_Fetch(merged, i); if (!Folder_Delete(folder, merged_file)) { String *mess = MAKE_MESS("Can't delete '%o'", merged_file); DECREF(merged); Err_throw_mess(ERR, mess); } } DECREF(merged); }