static void test_Local_MkDir(TestBatchRunner *runner) { RAMFolder *folder = RAMFolder_new(NULL); bool result; result = RAMFolder_Local_MkDir(folder, foo); TEST_TRUE(runner, result, "Local_MkDir succeeds and returns true"); Err_set_error(NULL); result = RAMFolder_Local_MkDir(folder, foo); TEST_FALSE(runner, result, "Local_MkDir returns false when a dir already exists"); TEST_TRUE(runner, Err_get_error() != NULL, "Local_MkDir sets Err_error when a dir already exists"); TEST_TRUE(runner, RAMFolder_Exists(folder, foo), "Existing dir untouched after failed Local_MkDir"); FileHandle *fh = RAMFolder_Open_FileHandle(folder, boffo, FH_CREATE | FH_WRITE_ONLY); DECREF(fh); Err_set_error(NULL); result = RAMFolder_Local_MkDir(folder, foo); TEST_FALSE(runner, result, "Local_MkDir returns false when a file already exists"); TEST_TRUE(runner, Err_get_error() != NULL, "Local_MkDir sets Err_error when a file already exists"); TEST_TRUE(runner, RAMFolder_Exists(folder, boffo) && !RAMFolder_Local_Is_Directory(folder, boffo), "Existing file untouched after failed Local_MkDir"); DECREF(folder); }
static void test_Open_FileHandle(TestBatch *batch) { Folder *folder = (Folder*)RAMFolder_new(NULL); FileHandle *fh; Folder_MkDir(folder, &foo); fh = Folder_Open_FileHandle(folder, &boffo, FH_CREATE | FH_WRITE_ONLY); TEST_TRUE(batch, fh && FH_Is_A(fh, FILEHANDLE), "Open_FileHandle"); DECREF(fh); fh = Folder_Open_FileHandle(folder, &foo_boffo, FH_CREATE | FH_WRITE_ONLY); TEST_TRUE(batch, fh && FH_Is_A(fh, FILEHANDLE), "Open_FileHandle for nested file"); DECREF(fh); Err_set_error(NULL); fh = Folder_Open_FileHandle(folder, &foo, FH_CREATE | FH_WRITE_ONLY); TEST_TRUE(batch, fh == NULL, "Open_FileHandle on existing dir path fails"); TEST_TRUE(batch, Err_get_error() != NULL, "Open_FileHandle on existing dir name sets Err_error"); Err_set_error(NULL); fh = Folder_Open_FileHandle(folder, &foo_bar_baz_boffo, FH_CREATE | FH_WRITE_ONLY); TEST_TRUE(batch, fh == NULL, "Open_FileHandle for entry within non-existent dir fails"); TEST_TRUE(batch, Err_get_error() != NULL, "Open_FileHandle for entry within non-existent dir sets Err_error"); DECREF(folder); }
static OutStream* S_lazy_init(HighlightWriter *self) { if (!self->dat_out) { Segment *segment = self->segment; Folder *folder = self->folder; CharBuf *seg_name = Seg_Get_Name(segment); // Open outstreams. { CharBuf *ix_file = CB_newf("%o/highlight.ix", seg_name); self->ix_out = Folder_Open_Out(folder, ix_file); DECREF(ix_file); if (!self->ix_out) { RETHROW(INCREF(Err_get_error())); } } { CharBuf *dat_file = CB_newf("%o/highlight.dat", seg_name); self->dat_out = Folder_Open_Out(folder, dat_file); DECREF(dat_file); if (!self->dat_out) { RETHROW(INCREF(Err_get_error())); } } // Go past invalid doc 0. OutStream_Write_I64(self->ix_out, 0); } return self->dat_out; }
static void test_Open_Dir(TestBatch *batch) { Folder *folder = (Folder*)RAMFolder_new(NULL); DirHandle *dh; Folder_MkDir(folder, &foo); Folder_MkDir(folder, &foo_bar); dh = Folder_Open_Dir(folder, &foo); TEST_TRUE(batch, dh && DH_Is_A(dh, DIRHANDLE), "Open_Dir"); DECREF(dh); dh = Folder_Open_Dir(folder, &foo_bar); TEST_TRUE(batch, dh && DH_Is_A(dh, DIRHANDLE), "Open_Dir nested dir"); DECREF(dh); Err_set_error(NULL); dh = Folder_Open_Dir(folder, &bar); TEST_TRUE(batch, dh == NULL, "Open_Dir on non-existent entry fails"); TEST_TRUE(batch, Err_get_error() != NULL, "Open_Dir on non-existent entry sets Err_error"); Err_set_error(NULL); dh = Folder_Open_Dir(folder, &foo_foo); TEST_TRUE(batch, dh == NULL, "Open_Dir on non-existent nested entry fails"); TEST_TRUE(batch, Err_get_error() != NULL, "Open_Dir on non-existent nested entry sets Err_error"); DECREF(folder); }
static OutStream* S_lazy_init(HighlightWriter *self) { HighlightWriterIVARS *const ivars = HLWriter_IVARS(self); if (!ivars->dat_out) { Segment *segment = ivars->segment; Folder *folder = ivars->folder; String *seg_name = Seg_Get_Name(segment); // Open outstreams. String *ix_file = Str_newf("%o/highlight.ix", seg_name); ivars->ix_out = Folder_Open_Out(folder, ix_file); DECREF(ix_file); if (!ivars->ix_out) { RETHROW(INCREF(Err_get_error())); } String *dat_file = Str_newf("%o/highlight.dat", seg_name); ivars->dat_out = Folder_Open_Out(folder, dat_file); DECREF(dat_file); if (!ivars->dat_out) { RETHROW(INCREF(Err_get_error())); } // Go past invalid doc 0. OutStream_Write_I64(ivars->ix_out, 0); } return ivars->dat_out; }
void LexWriter_start_field(LexiconWriter *self, int32_t field_num) { Segment *const segment = LexWriter_Get_Segment(self); Folder *const folder = LexWriter_Get_Folder(self); Schema *const schema = LexWriter_Get_Schema(self); CharBuf *const seg_name = Seg_Get_Name(segment); CharBuf *const field = Seg_Field_Name(segment, field_num); FieldType *const type = Schema_Fetch_Type(schema, field); // Open outstreams. CB_setf(self->dat_file, "%o/lexicon-%i32.dat", seg_name, field_num); CB_setf(self->ix_file, "%o/lexicon-%i32.ix", seg_name, field_num); CB_setf(self->ixix_file, "%o/lexicon-%i32.ixix", seg_name, field_num); self->dat_out = Folder_Open_Out(folder, self->dat_file); if (!self->dat_out) { RETHROW(INCREF(Err_get_error())); } self->ix_out = Folder_Open_Out(folder, self->ix_file); if (!self->ix_out) { RETHROW(INCREF(Err_get_error())); } self->ixix_out = Folder_Open_Out(folder, self->ixix_file); if (!self->ixix_out) { RETHROW(INCREF(Err_get_error())); } // Initialize count and ix_count, term stepper and term info stepper. self->count = 0; self->ix_count = 0; self->term_stepper = FType_Make_Term_Stepper(type); TermStepper_Reset(self->tinfo_stepper); }
static void test_Read_Write(TestBatchRunner *runner) { FSFileHandle *fh; const char *foo = "foo"; const char *bar = "bar"; char buffer[12]; char *buf = buffer; String *test_filename = SSTR_WRAP_C("_fstest"); S_remove(test_filename); fh = FSFH_open(test_filename, FH_CREATE | FH_WRITE_ONLY | FH_EXCLUSIVE); TEST_TRUE(runner, FSFH_Length(fh) == INT64_C(0), "Length initially 0"); TEST_TRUE(runner, FSFH_Write(fh, foo, 3), "Write returns success"); TEST_TRUE(runner, FSFH_Length(fh) == INT64_C(3), "Length after Write"); TEST_TRUE(runner, FSFH_Write(fh, bar, 3), "Write returns success"); TEST_TRUE(runner, FSFH_Length(fh) == INT64_C(6), "Length after 2 Writes"); Err_set_error(NULL); TEST_FALSE(runner, FSFH_Read(fh, buf, 0, 2), "Reading from a write-only handle returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Reading from a write-only handle sets error"); if (!FSFH_Close(fh)) { RETHROW(INCREF(Err_get_error())); } DECREF(fh); // Reopen for reading. Err_set_error(NULL); fh = FSFH_open(test_filename, FH_READ_ONLY); TEST_TRUE(runner, FSFH_Length(fh) == INT64_C(6), "Length on Read"); TEST_TRUE(runner, FSFH_Read(fh, buf, 0, 6), "Read returns success"); TEST_TRUE(runner, strncmp(buf, "foobar", 6) == 0, "Read/Write"); TEST_TRUE(runner, FSFH_Read(fh, buf, 2, 3), "Read returns success"); TEST_TRUE(runner, strncmp(buf, "oba", 3) == 0, "Read with offset"); Err_set_error(NULL); TEST_FALSE(runner, FSFH_Read(fh, buf, -1, 4), "Read() with a negative offset returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Read() with a negative offset sets error"); Err_set_error(NULL); TEST_FALSE(runner, FSFH_Read(fh, buf, 6, 1), "Read() past EOF returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Read() past EOF sets error"); Err_set_error(NULL); TEST_FALSE(runner, FSFH_Write(fh, foo, 3), "Writing to a read-only handle returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Writing to a read-only handle sets error"); DECREF(fh); S_remove(test_filename); }
void PostPool_Flip_IMP(PostingPool *self) { PostingPoolIVARS *const ivars = PostPool_IVARS(self); uint32_t num_runs = VA_Get_Size(ivars->runs); uint32_t sub_thresh = num_runs > 0 ? ivars->mem_thresh / num_runs : ivars->mem_thresh; if (num_runs) { Folder *folder = PolyReader_Get_Folder(ivars->polyreader); String *seg_name = Seg_Get_Name(ivars->segment); String *lex_temp_path = Str_newf("%o/lextemp", seg_name); String *post_temp_path = Str_newf("%o/ptemp", seg_name); ivars->lex_temp_in = Folder_Open_In(folder, lex_temp_path); if (!ivars->lex_temp_in) { RETHROW(INCREF(Err_get_error())); } ivars->post_temp_in = Folder_Open_In(folder, post_temp_path); if (!ivars->post_temp_in) { RETHROW(INCREF(Err_get_error())); } DECREF(lex_temp_path); DECREF(post_temp_path); } PostPool_Sort_Buffer(self); if (num_runs && (ivars->buf_max - ivars->buf_tick) > 0) { uint32_t num_items = PostPool_Buffer_Count(self); // Cheap imitation of flush. FIXME. PostingPool *run = PostPool_new(ivars->schema, ivars->snapshot, ivars->segment, ivars->polyreader, ivars->field, ivars->lex_writer, ivars->mem_pool, ivars->lex_temp_out, ivars->post_temp_out, ivars->skip_out); PostPool_Grow_Buffer(run, num_items); PostingPoolIVARS *const run_ivars = PostPool_IVARS(run); memcpy(run_ivars->buffer, (ivars->buffer) + ivars->buf_tick, num_items * sizeof(Obj*)); run_ivars->buf_max = num_items; PostPool_Add_Run(self, (SortExternal*)run); ivars->buf_tick = 0; ivars->buf_max = 0; } // Assign. for (uint32_t i = 0; i < num_runs; i++) { PostingPool *run = (PostingPool*)VA_Fetch(ivars->runs, i); if (run != NULL) { PostPool_Set_Mem_Thresh(run, sub_thresh); if (!PostPool_IVARS(run)->lexicon) { S_fresh_flip(run, ivars->lex_temp_in, ivars->post_temp_in); } } } ivars->flipped = true; }
DefaultHighlightReader* DefHLReader_init(DefaultHighlightReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, VArray *segments, int32_t seg_tick) { Segment *segment; Hash *metadata; HLReader_init((HighlightReader*)self, schema, folder, snapshot, segments, seg_tick); segment = DefHLReader_Get_Segment(self); metadata = (Hash*)Seg_Fetch_Metadata_Str(segment, "highlight", 9); if (!metadata) { metadata = (Hash*)Seg_Fetch_Metadata_Str(segment, "term_vectors", 12); } // Check format. if (metadata) { Obj *format = Hash_Fetch_Str(metadata, "format", 6); if (!format) { THROW(ERR, "Missing 'format' var"); } else { if (Obj_To_I64(format) != HLWriter_current_file_format) { THROW(ERR, "Unsupported highlight data format: %i64", Obj_To_I64(format)); } } } // Open instreams. { CharBuf *seg_name = Seg_Get_Name(segment); CharBuf *ix_file = CB_newf("%o/highlight.ix", seg_name); CharBuf *dat_file = CB_newf("%o/highlight.dat", seg_name); if (Folder_Exists(folder, ix_file)) { self->ix_in = Folder_Open_In(folder, ix_file); if (!self->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } self->dat_in = Folder_Open_In(folder, dat_file); if (!self->dat_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } } DECREF(ix_file); DECREF(dat_file); } return self; }
DefaultDocReader* DefDocReader_init(DefaultDocReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, Vector *segments, int32_t seg_tick) { Hash *metadata; Segment *segment; DocReader_init((DocReader*)self, schema, folder, snapshot, segments, seg_tick); DefaultDocReaderIVARS *const ivars = DefDocReader_IVARS(self); segment = DefDocReader_Get_Segment(self); metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "documents", 9); if (metadata) { String *seg_name = Seg_Get_Name(segment); String *ix_file = Str_newf("%o/documents.ix", seg_name); String *dat_file = Str_newf("%o/documents.dat", seg_name); Obj *format = Hash_Fetch_Utf8(metadata, "format", 6); // Check format. if (!format) { THROW(ERR, "Missing 'format' var"); } else { int64_t format_val = Json_obj_to_i64(format); if (format_val < DocWriter_current_file_format) { THROW(ERR, "Obsolete doc storage format %i64; " "Index regeneration is required", format_val); } else if (format_val != DocWriter_current_file_format) { THROW(ERR, "Unsupported doc storage format: %i64", format_val); } } // Get streams. if (Folder_Exists(folder, ix_file)) { ivars->ix_in = Folder_Open_In(folder, ix_file); if (!ivars->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } ivars->dat_in = Folder_Open_In(folder, dat_file); if (!ivars->dat_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } } DECREF(ix_file); DECREF(dat_file); } return self; }
static void test_Window(TestBatchRunner *runner) { String *test_filename = SSTR_WRAP_C("_fstest"); FSFileHandle *fh; FileWindow *window = FileWindow_new(); uint32_t i; S_remove(test_filename); fh = FSFH_open(test_filename, FH_CREATE | FH_WRITE_ONLY | FH_EXCLUSIVE); for (i = 0; i < 1024; i++) { FSFH_Write(fh, "foo ", 4); } if (!FSFH_Close(fh)) { RETHROW(INCREF(Err_get_error())); } // Reopen for reading. DECREF(fh); fh = FSFH_open(test_filename, FH_READ_ONLY); if (!fh) { RETHROW(INCREF(Err_get_error())); } Err_set_error(NULL); TEST_FALSE(runner, FSFH_Window(fh, window, -1, 4), "Window() with a negative offset returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Window() with a negative offset sets error"); Err_set_error(NULL); TEST_FALSE(runner, FSFH_Window(fh, window, 4000, 1000), "Window() past EOF returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Window() past EOF sets error"); TEST_TRUE(runner, FSFH_Window(fh, window, 1021, 2), "Window() returns true"); const char *buf = FileWindow_Get_Buf(window); int64_t offset = FileWindow_Get_Offset(window); TEST_TRUE(runner, strncmp(buf - offset + 1021, "oo", 2) == 0, "Window()"); TEST_TRUE(runner, FSFH_Release_Window(fh, window), "Release_Window() returns true"); TEST_TRUE(runner, FileWindow_Get_Buf(window) == NULL, "Release_Window() resets buf"); TEST_INT_EQ(runner, FileWindow_Get_Offset(window), 0, "Release_Window() resets offset"); TEST_INT_EQ(runner, FileWindow_Get_Len(window), 0, "Release_Window() resets len"); DECREF(window); DECREF(fh); S_remove(test_filename); }
LexIndex* LexIndex_init(LexIndex *self, Schema *schema, Folder *folder, Segment *segment, String *field) { int32_t field_num = Seg_Field_Num(segment, field); String *seg_name = Seg_Get_Name(segment); String *ixix_file = Str_newf("%o/lexicon-%i32.ixix", seg_name, field_num); String *ix_file = Str_newf("%o/lexicon-%i32.ix", seg_name, field_num); Architecture *arch = Schema_Get_Architecture(schema); // Init. Lex_init((Lexicon*)self, field); LexIndexIVARS *const ivars = LexIndex_IVARS(self); ivars->tinfo = TInfo_new(0); ivars->tick = 0; // Derive ivars->field_type = Schema_Fetch_Type(schema, field); if (!ivars->field_type) { String *mess = MAKE_MESS("Unknown field: '%o'", field); DECREF(ix_file); DECREF(ixix_file); DECREF(self); Err_throw_mess(ERR, mess); } ivars->field_type = (FieldType*)INCREF(ivars->field_type); ivars->term_stepper = FType_Make_Term_Stepper(ivars->field_type); ivars->ixix_in = Folder_Open_In(folder, ixix_file); if (!ivars->ixix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(ixix_file); DECREF(self); RETHROW(error); } ivars->ix_in = Folder_Open_In(folder, ix_file); if (!ivars->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(ixix_file); DECREF(self); RETHROW(error); } ivars->index_interval = Arch_Index_Interval(arch); ivars->skip_interval = Arch_Skip_Interval(arch); ivars->size = (int32_t)(InStream_Length(ivars->ixix_in) / sizeof(int64_t)); ivars->offsets = (const int64_t*)InStream_Buf(ivars->ixix_in, (size_t)InStream_Length(ivars->ixix_in)); DECREF(ixix_file); DECREF(ix_file); return self; }
DefaultHighlightReader* DefHLReader_init(DefaultHighlightReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, Vector *segments, int32_t seg_tick) { HLReader_init((HighlightReader*)self, schema, folder, snapshot, segments, seg_tick); DefaultHighlightReaderIVARS *const ivars = DefHLReader_IVARS(self); Segment *segment = DefHLReader_Get_Segment(self); Hash *metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "highlight", 9); if (!metadata) { metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "term_vectors", 12); } // Check format. if (metadata) { Obj *format = Hash_Fetch_Utf8(metadata, "format", 6); if (!format) { THROW(ERR, "Missing 'format' var"); } else { if (Json_obj_to_i64(format) != HLWriter_current_file_format) { THROW(ERR, "Unsupported highlight data format: %i64", Json_obj_to_i64(format)); } } } // Open instreams. String *seg_name = Seg_Get_Name(segment); String *ix_file = Str_newf("%o/highlight.ix", seg_name); String *dat_file = Str_newf("%o/highlight.dat", seg_name); if (Folder_Exists(folder, ix_file)) { ivars->ix_in = Folder_Open_In(folder, ix_file); if (!ivars->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } ivars->dat_in = Folder_Open_In(folder, dat_file); if (!ivars->dat_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } } DECREF(ix_file); DECREF(dat_file); return self; }
static void test_Window(TestBatchRunner *runner) { String *test_filename = (String*)SSTR_WRAP_UTF8("_fstest", 7); FSFileHandle *fh; FileWindow *window = FileWindow_new(); FileWindowIVARS *const window_ivars = FileWindow_IVARS(window); uint32_t i; remove(Str_Get_Ptr8(test_filename)); fh = FSFH_open(test_filename, FH_CREATE | FH_WRITE_ONLY | FH_EXCLUSIVE); for (i = 0; i < 1024; i++) { FSFH_Write(fh, "foo ", 4); } if (!FSFH_Close(fh)) { RETHROW(INCREF(Err_get_error())); } // Reopen for reading. DECREF(fh); fh = FSFH_open(test_filename, FH_READ_ONLY); if (!fh) { RETHROW(INCREF(Err_get_error())); } Err_set_error(NULL); TEST_FALSE(runner, FSFH_Window(fh, window, -1, 4), "Window() with a negative offset returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Window() with a negative offset sets error"); Err_set_error(NULL); TEST_FALSE(runner, FSFH_Window(fh, window, 4000, 1000), "Window() past EOF returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Window() past EOF sets error"); TEST_TRUE(runner, FSFH_Window(fh, window, 1021, 2), "Window() returns true"); TEST_TRUE(runner, strncmp(window_ivars->buf - window_ivars->offset + 1021, "oo", 2) == 0, "Window()"); TEST_TRUE(runner, FSFH_Release_Window(fh, window), "Release_Window() returns true"); TEST_TRUE(runner, window_ivars->buf == NULL, "Release_Window() resets buf"); TEST_TRUE(runner, window_ivars->offset == 0, "Release_Window() resets offset"); TEST_TRUE(runner, window_ivars->len == 0, "Release_Window() resets len"); DECREF(window); DECREF(fh); remove(Str_Get_Ptr8(test_filename)); }
void SortFieldWriter_Flip_IMP(SortFieldWriter *self) { SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self); uint32_t num_items = SortFieldWriter_Buffer_Count(self); uint32_t num_runs = Vec_Get_Size(ivars->runs); if (ivars->flipped) { THROW(ERR, "Can't call Flip() twice"); } ivars->flipped = true; // Sanity check. if (num_runs && num_items) { THROW(ERR, "Sanity check failed: num_runs: %u32 num_items: %u32", num_runs, num_items); } if (num_items) { SortFieldWriter_Sort_Buffer(self); } else if (num_runs) { Folder *folder = PolyReader_Get_Folder(ivars->polyreader); String *seg_name = Seg_Get_Name(ivars->segment); String *ord_path = Str_newf("%o/sort_ord_temp", seg_name); ivars->ord_in = Folder_Open_In(folder, ord_path); DECREF(ord_path); if (!ivars->ord_in) { RETHROW(INCREF(Err_get_error())); } if (ivars->var_width) { String *ix_path = Str_newf("%o/sort_ix_temp", seg_name); ivars->ix_in = Folder_Open_In(folder, ix_path); DECREF(ix_path); if (!ivars->ix_in) { RETHROW(INCREF(Err_get_error())); } } String *dat_path = Str_newf("%o/sort_dat_temp", seg_name); ivars->dat_in = Folder_Open_In(folder, dat_path); DECREF(dat_path); if (!ivars->dat_in) { RETHROW(INCREF(Err_get_error())); } // Assign streams and a slice of mem_thresh. size_t sub_thresh = ivars->mem_thresh / num_runs; if (sub_thresh < 65536) { sub_thresh = 65536; } for (uint32_t i = 0; i < num_runs; i++) { SortFieldWriter *run = (SortFieldWriter*)Vec_Fetch(ivars->runs, i); S_flip_run(run, sub_thresh, ivars->ord_in, ivars->ix_in, ivars->dat_in); } } ivars->flipped = true; }
static void test_spew_and_slurp(TestBatch *batch) { Obj *dump = S_make_dump(); Folder *folder = (Folder*)RAMFolder_new(NULL); CharBuf *foo = (CharBuf*)ZCB_WRAP_STR("foo", 3); bool_t result = Json_spew_json(dump, folder, foo); TEST_TRUE(batch, result, "spew_json returns true on success"); TEST_TRUE(batch, Folder_Exists(folder, foo), "spew_json wrote file"); Obj *got = Json_slurp_json(folder, foo); TEST_TRUE(batch, got && Obj_Equals(dump, got), "Round trip through spew_json and slurp_json"); DECREF(got); Err_set_error(NULL); result = Json_spew_json(dump, folder, foo); TEST_FALSE(batch, result, "Can't spew_json when file exists"); TEST_TRUE(batch, Err_get_error() != NULL, "Failed spew_json sets Err_error"); Err_set_error(NULL); CharBuf *bar = (CharBuf*)ZCB_WRAP_STR("bar", 3); got = Json_slurp_json(folder, bar); TEST_TRUE(batch, got == NULL, "slurp_json returns NULL when file doesn't exist"); TEST_TRUE(batch, Err_get_error() != NULL, "Failed slurp_json sets Err_error"); CharBuf *boffo = (CharBuf*)ZCB_WRAP_STR("boffo", 5); FileHandle *fh = Folder_Open_FileHandle(folder, boffo, FH_CREATE | FH_WRITE_ONLY); FH_Write(fh, "garbage", 7); DECREF(fh); Err_set_error(NULL); got = Json_slurp_json(folder, boffo); TEST_TRUE(batch, got == NULL, "slurp_json returns NULL when file doesn't contain valid JSON"); TEST_TRUE(batch, Err_get_error() != NULL, "Failed slurp_json sets Err_error"); DECREF(got); DECREF(dump); DECREF(folder); }
DirHandle* RAMFolder_local_open_dir(RAMFolder *self) { RAMDirHandle *dh = RAMDH_new(self); if (!dh) { ERR_ADD_FRAME(Err_get_error()); } return (DirHandle*)dh; }
static void S_fill(InStream *self, int64_t amount) { InStreamIVARS *const ivars = InStream_IVARS(self); FileWindow *const window = ivars->window; const int64_t virtual_file_pos = SI_tell(self); const int64_t real_file_pos = virtual_file_pos + ivars->offset; const int64_t remaining = ivars->len - virtual_file_pos; // Throw an error if the requested amount would take us beyond EOF. if (amount > remaining) { THROW(ERR, "Read past EOF of %o (pos: %u64 len: %u64 request: %u64)", ivars->filename, virtual_file_pos, ivars->len, amount); } // Make the request. if (FH_Window(ivars->file_handle, window, real_file_pos, amount)) { char *fw_buf = FileWindow_Get_Buf(window); int64_t fw_offset = FileWindow_Get_Offset(window); int64_t fw_len = FileWindow_Get_Len(window); char *const window_limit = fw_buf + fw_len; ivars->buf = fw_buf - fw_offset // theoretical start of real file + ivars->offset // top of virtual file + virtual_file_pos; // position within virtual file ivars->limit = window_limit - ivars->buf > remaining ? ivars->buf + remaining : window_limit; } else { Err *error = Err_get_error(); CB_catf(Err_Get_Mess(error), " (%o)", ivars->filename); RETHROW(INCREF(error)); } }
void Indexer_commit(Indexer *self) { // Safety check. if ( !self->write_lock ) { THROW(ERR, "Can't call commit() more than once"); } if (!self->prepared) { Indexer_Prepare_Commit(self); } if (self->needs_commit) { bool_t success; // Rename temp snapshot file. CharBuf *temp_snapfile = CB_Clone(self->snapfile); CB_Chop(self->snapfile, sizeof(".temp") - 1); Snapshot_Set_Path(self->snapshot, self->snapfile); success = Folder_Rename(self->folder, temp_snapfile, self->snapfile); DECREF(temp_snapfile); if (!success) { RETHROW(INCREF(Err_get_error())); } // Purge obsolete files. FilePurger_Purge(self->file_purger); } // Release locks, invalidating the Indexer. S_release_merge_lock(self); S_release_write_lock(self); }
ByteBuf* Folder_slurp_file(Folder *self, const CharBuf *path) { InStream *instream = Folder_Open_In(self, path); ByteBuf *retval = NULL; if (!instream) { RETHROW(INCREF(Err_get_error())); } else { uint64_t length = InStream_Length(instream); if (length >= SIZE_MAX) { InStream_Close(instream); DECREF(instream); THROW(ERR, "File %o is too big to slurp (%u64 bytes)", path, length); } else { size_t size = (size_t)length; char *ptr = (char*)MALLOCATE((size_t)size + 1); InStream_Read_Bytes(instream, ptr, size); ptr[size] = '\0'; retval = BB_new_steal_bytes(ptr, size, size + 1); InStream_Close(instream); DECREF(instream); } } return retval; }
/* This method exists as a hook for CompoundFileReader to override; it is * necessary because calling CFReader_Local_Open_FileHandle() won't find * virtual files. No other class should need to override it. */ InStream* Folder_local_open_in(Folder *self, const CharBuf *name) { FileHandle *fh = Folder_Local_Open_FileHandle(self, name, FH_READ_ONLY); InStream *instream = NULL; if (fh) { instream = InStream_open((Obj*)fh); DECREF(fh); if (!instream) { ERR_ADD_FRAME(Err_get_error()); } } else { ERR_ADD_FRAME(Err_get_error()); } return instream; }
static void test_Local_Open_In(TestBatchRunner *runner) { Folder *real_folder = S_folder_with_contents(); CompoundFileReader *cf_reader = CFReader_open(real_folder); InStream *instream; instream = CFReader_Local_Open_In(cf_reader, foo); TEST_TRUE(runner, instream != NULL, "Local_Open_In for virtual file"); TEST_TRUE(runner, Str_Starts_With(InStream_Get_Filename(instream), CFReader_Get_Path(cf_reader)), "InStream's path includes directory"); DECREF(instream); OutStream *outstream = CFReader_Open_Out(cf_reader, baz); OutStream_Write_Bytes(outstream, "baz", 3); OutStream_Close(outstream); DECREF(outstream); instream = CFReader_Local_Open_In(cf_reader, baz); TEST_TRUE(runner, instream != NULL, "Local_Open_In pass-through for real file"); DECREF(instream); Err_set_error(NULL); instream = CFReader_Local_Open_In(cf_reader, stuff); TEST_TRUE(runner, instream == NULL, "Local_Open_In for non-existent file returns NULL"); TEST_TRUE(runner, Err_get_error() != NULL, "Local_Open_In for non-existent file sets global error"); DECREF(cf_reader); DECREF(real_folder); }
void DefDelWriter_Finish_IMP(DefaultDeletionsWriter *self) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); Folder *const folder = ivars->folder; for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i); if (ivars->updated[i]) { BitVector *deldocs = (BitVector*)VA_Fetch(ivars->bit_vecs, i); int32_t doc_max = SegReader_Doc_Max(seg_reader); double used = (doc_max + 1) / 8.0; uint32_t byte_size = (uint32_t)ceil(used); uint32_t new_max = byte_size * 8 - 1; String *filename = S_del_filename(self, seg_reader); OutStream *outstream = Folder_Open_Out(folder, filename); if (!outstream) { RETHROW(INCREF(Err_get_error())); } // Ensure that we have 1 bit for each doc in segment. BitVec_Grow(deldocs, new_max); // Write deletions data and clean up. OutStream_Write_Bytes(outstream, (char*)BitVec_Get_Raw_Bits(deldocs), byte_size); OutStream_Close(outstream); DECREF(outstream); DECREF(filename); } } Seg_Store_Metadata_Utf8(ivars->segment, "deletions", 9, (Obj*)DefDelWriter_Metadata(self)); }
InStream* CFReader_Local_Open_In_IMP(CompoundFileReader *self, String *name) { CompoundFileReaderIVARS *const ivars = CFReader_IVARS(self); Hash *entry = (Hash*)Hash_Fetch(ivars->records, name); if (!entry) { InStream *instream = Folder_Local_Open_In(ivars->real_folder, name); if (!instream) { ERR_ADD_FRAME(Err_get_error()); } return instream; } else { Obj *len = Hash_Fetch_Utf8(entry, "length", 6); Obj *offset = Hash_Fetch_Utf8(entry, "offset", 6); if (!len || !offset) { Err_set_error(Err_new(Str_newf("Malformed entry for '%o' in '%o'", name, Folder_Get_Path(ivars->real_folder)))); return NULL; } else if (Str_Get_Size(ivars->path)) { String *fullpath = Str_newf("%o/%o", ivars->path, name); InStream *instream = InStream_Reopen(ivars->instream, fullpath, Obj_To_I64(offset), Obj_To_I64(len)); DECREF(fullpath); return instream; } else { return InStream_Reopen(ivars->instream, name, Obj_To_I64(offset), Obj_To_I64(len)); } } }
String* IxFileNames_latest_snapshot(Folder *folder) { DirHandle *dh = Folder_Open_Dir(folder, NULL); String *retval = NULL; uint64_t latest_gen = 0; if (!dh) { RETHROW(INCREF(Err_get_error())); } while (DH_Next(dh)) { String *entry = DH_Get_Entry(dh); if (Str_Starts_With_Utf8(entry, "snapshot_", 9) && Str_Ends_With_Utf8(entry, ".json", 5) ) { uint64_t gen = IxFileNames_extract_gen(entry); if (gen > latest_gen) { latest_gen = gen; DECREF(retval); retval = Str_Clone(entry); } } DECREF(entry); } DECREF(dh); return retval; }
static void test_Close(TestBatchRunner *runner) { String *test_filename = SSTR_WRAP_C("_fstest"); FSFileHandle *fh; S_remove(test_filename); fh = FSFH_open(test_filename, FH_CREATE | FH_WRITE_ONLY | FH_EXCLUSIVE); TEST_TRUE(runner, FSFH_Close(fh), "Close returns true for write-only"); DECREF(fh); // Simulate an OS error when closing the file descriptor. This // approximates what would happen if, say, we run out of disk space. S_remove(test_filename); fh = FSFH_open(test_filename, FH_CREATE | FH_WRITE_ONLY | FH_EXCLUSIVE); #ifdef _MSC_VER SKIP(runner, 2, "LUCY-155"); #else int saved_fd = FSFH_Set_FD(fh, -1); Err_set_error(NULL); bool result = FSFH_Close(fh); TEST_FALSE(runner, result, "Failed Close() returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Failed Close() sets global error"); FSFH_Set_FD(fh, saved_fd); #endif /* _MSC_VER */ DECREF(fh); fh = FSFH_open(test_filename, FH_READ_ONLY); TEST_TRUE(runner, FSFH_Close(fh), "Close returns true for read-only"); DECREF(fh); S_remove(test_filename); }
static void test_stemming(TestBatchRunner *runner) { FSFolder *modules_folder = TestUtils_modules_folder(); String *path = Str_newf("analysis/snowstem/source/test/tests.json"); Hash *tests = (Hash*)Json_slurp_json((Folder*)modules_folder, path); if (!tests) { RETHROW(Err_get_error()); } String *iso; Hash *lang_data; Hash_Iterate(tests); while (Hash_Next(tests, (Obj**)&iso, (Obj**)&lang_data)) { VArray *words = (VArray*)Hash_Fetch_Utf8(lang_data, "words", 5); VArray *stems = (VArray*)Hash_Fetch_Utf8(lang_data, "stems", 5); SnowballStemmer *stemmer = SnowStemmer_new(iso); for (uint32_t i = 0, max = VA_Get_Size(words); i < max; i++) { String *word = (String*)VA_Fetch(words, i); VArray *got = SnowStemmer_Split(stemmer, word); String *stem = (String*)VA_Fetch(got, 0); TEST_TRUE(runner, stem && Str_Is_A(stem, STRING) && Str_Equals(stem, VA_Fetch(stems, i)), "Stem %s: %s", Str_Get_Ptr8(iso), Str_Get_Ptr8(word) ); DECREF(got); } DECREF(stemmer); } DECREF(tests); DECREF(modules_folder); DECREF(path); }
String* IxManager_Make_Snapshot_Filename_IMP(IndexManager *self) { IndexManagerIVARS *const ivars = IxManager_IVARS(self); Folder *folder = (Folder*)CERTIFY(ivars->folder, FOLDER); DirHandle *dh = Folder_Open_Dir(folder, NULL); uint64_t max_gen = 0; if (!dh) { RETHROW(INCREF(Err_get_error())); } while (DH_Next(dh)) { String *entry = DH_Get_Entry(dh); if (Str_Starts_With_Utf8(entry, "snapshot_", 9) && Str_Ends_With_Utf8(entry, ".json", 5) ) { uint64_t gen = IxFileNames_extract_gen(entry); if (gen > max_gen) { max_gen = gen; } } DECREF(entry); } DECREF(dh); uint64_t new_gen = max_gen + 1; char base36[StrHelp_MAX_BASE36_BYTES]; StrHelp_to_base36(new_gen, &base36); return Str_newf("snapshot_%s.json", &base36); }
static void S_fill(InStream *self, int64_t amount) { FileWindow *const window = self->window; const int64_t virtual_file_pos = SI_tell(self); const int64_t real_file_pos = virtual_file_pos + self->offset; const int64_t remaining = self->len - virtual_file_pos; // Throw an error if the requested amount would take us beyond EOF. if (amount > remaining) { THROW(ERR, "Read past EOF of %o (pos: %u64 len: %u64 request: %u64)", self->filename, virtual_file_pos, self->len, amount); } // Make the request. if (FH_Window(self->file_handle, window, real_file_pos, amount)) { char *const window_limit = window->buf + window->len; self->buf = window->buf - window->offset // theoretical start of real file + self->offset // top of virtual file + virtual_file_pos; // position within virtual file self->limit = window_limit - self->buf > remaining ? self->buf + remaining : window_limit; } else { Err *error = Err_get_error(); CB_catf(Err_Get_Mess(error), " (%o)", self->filename); RETHROW(INCREF(error)); } }
CharBuf* Json_to_json(Obj *dump) { // Validate object type, only allowing hashes and arrays per JSON spec. if (!dump || !(Obj_Is_A(dump, HASH) || Obj_Is_A(dump, VARRAY))) { if (!tolerant) { CharBuf *class_name = dump ? Obj_Get_Class_Name(dump) : NULL; CharBuf *mess = MAKE_MESS("Illegal top-level object type: %o", class_name); Err_set_error(Err_new(mess)); return NULL; } } // Encode. CharBuf *json = CB_new(31); if (!S_to_json(dump, json, 0)) { DECREF(json); ERR_ADD_FRAME(Err_get_error()); json = NULL; } else { // Append newline. CB_Cat_Trusted_Str(json, "\n", 1); } return json; }