static String* S_del_filename(DefaultDeletionsWriter *self, SegReader *target_reader) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); Segment *target_seg = SegReader_Get_Segment(target_reader); return Str_newf("%o/deletions-%o.bv", Seg_Get_Name(ivars->segment), Seg_Get_Name(target_seg)); }
static void test_Write_File_and_Read_File(TestBatchRunner *runner) { RAMFolder *folder = RAMFolder_new(NULL); Segment *segment = Seg_new(100); Segment *got = Seg_new(100); String *meta; String *flotsam = (String*)SSTR_WRAP_UTF8("flotsam", 7); String *jetsam = (String*)SSTR_WRAP_UTF8("jetsam", 6); Seg_Set_Count(segment, 111); Seg_Store_Metadata_Utf8(segment, "foo", 3, (Obj*)Str_newf("bar")); Seg_Add_Field(segment, flotsam); Seg_Add_Field(segment, jetsam); RAMFolder_MkDir(folder, Seg_Get_Name(segment)); Seg_Write_File(segment, (Folder*)folder); Seg_Read_File(got, (Folder*)folder); TEST_TRUE(runner, Seg_Get_Count(got) == Seg_Get_Count(segment), "Round-trip count through file"); TEST_TRUE(runner, Seg_Field_Num(got, jetsam) == Seg_Field_Num(segment, jetsam), "Round trip field names through file"); meta = (String*)Seg_Fetch_Metadata_Utf8(got, "foo", 3); TEST_TRUE(runner, meta && Str_Is_A(meta, STRING) && Str_Equals_Utf8(meta, "bar", 3), "Round trip metadata through file"); DECREF(got); DECREF(segment); DECREF(folder); }
static OutStream* S_lazy_init(HighlightWriter *self) { HighlightWriterIVARS *const ivars = HLWriter_IVARS(self); if (!ivars->dat_out) { Segment *segment = ivars->segment; Folder *folder = ivars->folder; String *seg_name = Seg_Get_Name(segment); // Open outstreams. String *ix_file = Str_newf("%o/highlight.ix", seg_name); ivars->ix_out = Folder_Open_Out(folder, ix_file); DECREF(ix_file); if (!ivars->ix_out) { RETHROW(INCREF(Err_get_error())); } String *dat_file = Str_newf("%o/highlight.dat", seg_name); ivars->dat_out = Folder_Open_Out(folder, dat_file); DECREF(dat_file); if (!ivars->dat_out) { RETHROW(INCREF(Err_get_error())); } // Go past invalid doc 0. OutStream_Write_I64(ivars->ix_out, 0); } return ivars->dat_out; }
void LexWriter_start_field(LexiconWriter *self, i32_t field_num) { CharBuf *const seg_name = Seg_Get_Name(self->segment); Folder *const folder = self->folder; Snapshot *const snapshot = LexWriter_Get_Snapshot(self); /* Open outstreams. */ CB_setf(self->dat_file, "%o/lexicon-%i32.dat", seg_name, field_num); CB_setf(self->ix_file, "%o/lexicon-%i32.ix", seg_name, field_num); CB_setf(self->ixix_file, "%o/lexicon-%i32.ixix", seg_name, field_num); Snapshot_Add_Entry(snapshot, self->dat_file); Snapshot_Add_Entry(snapshot, self->ix_file); Snapshot_Add_Entry(snapshot, self->ixix_file); self->dat_out = Folder_Open_Out(folder, self->dat_file); self->ix_out = Folder_Open_Out(folder, self->ix_file); self->ixix_out = Folder_Open_Out(folder, self->ixix_file); if (!self->dat_out) { THROW("Can't open %o", self->dat_file); } if (!self->ix_out) { THROW("Can't open %o", self->ix_file); } if (!self->ixix_out) { THROW("Can't open %o", self->ixix_file); } /* Initialize count and ix_count, TermInfo and last term text. */ self->count = 0; self->ix_count = 0; TInfo_Reset(self->last_tinfo); CB_Set_Size(self->last_text, 0); }
Matcher* DefDelWriter_Seg_Deletions_IMP(DefaultDeletionsWriter *self, SegReader *seg_reader) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); Matcher *deletions = NULL; Segment *segment = SegReader_Get_Segment(seg_reader); String *seg_name = Seg_Get_Name(segment); Integer32 *tick_obj = (Integer32*)Hash_Fetch(ivars->name_to_tick, (Obj*)seg_name); int32_t tick = tick_obj ? Int32_Get_Value(tick_obj) : 0; SegReader *candidate = tick_obj ? (SegReader*)VA_Fetch(ivars->seg_readers, tick) : NULL; if (tick_obj) { DeletionsReader *del_reader = (DeletionsReader*)SegReader_Obtain( candidate, Class_Get_Name(DELETIONSREADER)); if (ivars->updated[tick] || DelReader_Del_Count(del_reader)) { BitVector *deldocs = (BitVector*)VA_Fetch(ivars->bit_vecs, tick); deletions = (Matcher*)BitVecMatcher_new(deldocs); } } else { // Sanity check. THROW(ERR, "Couldn't find SegReader %o", seg_reader); } return deletions; }
Hash* DefDelWriter_Metadata_IMP(DefaultDeletionsWriter *self) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); DefDelWriter_Metadata_t super_meta = (DefDelWriter_Metadata_t)SUPER_METHOD_PTR(DEFAULTDELETIONSWRITER, LUCY_DefDelWriter_Metadata); Hash *const metadata = super_meta(self); Hash *const files = Hash_new(0); for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i); if (ivars->updated[i]) { BitVector *deldocs = (BitVector*)VA_Fetch(ivars->bit_vecs, i); Segment *segment = SegReader_Get_Segment(seg_reader); Hash *mini_meta = Hash_new(2); Hash_Store_Utf8(mini_meta, "count", 5, (Obj*)Str_newf("%u32", (uint32_t)BitVec_Count(deldocs))); Hash_Store_Utf8(mini_meta, "filename", 8, (Obj*)S_del_filename(self, seg_reader)); Hash_Store(files, (Obj*)Seg_Get_Name(segment), (Obj*)mini_meta); } } Hash_Store_Utf8(metadata, "files", 5, (Obj*)files); return metadata; }
static void test_Write_File_and_Read_File(TestBatch *batch) { RAMFolder *folder = RAMFolder_new(NULL); Segment *segment = Seg_new(100); Segment *got = Seg_new(100); CharBuf *meta; CharBuf *flotsam = (CharBuf*)ZCB_WRAP_STR("flotsam", 7); CharBuf *jetsam = (CharBuf*)ZCB_WRAP_STR("jetsam", 6); Seg_Set_Count(segment, 111); Seg_Store_Metadata_Str(segment, "foo", 3, (Obj*)CB_newf("bar")); Seg_Add_Field(segment, flotsam); Seg_Add_Field(segment, jetsam); RAMFolder_MkDir(folder, Seg_Get_Name(segment)); Seg_Write_File(segment, (Folder*)folder); Seg_Read_File(got, (Folder*)folder); TEST_TRUE(batch, Seg_Get_Count(got) == Seg_Get_Count(segment), "Round-trip count through file"); TEST_TRUE(batch, Seg_Field_Num(got, jetsam) == Seg_Field_Num(segment, jetsam), "Round trip field names through file"); meta = (CharBuf*)Seg_Fetch_Metadata_Str(got, "foo", 3); TEST_TRUE(batch, meta && CB_Is_A(meta, CHARBUF) && CB_Equals_Str(meta, "bar", 3), "Round trip metadata through file"); DECREF(got); DECREF(segment); DECREF(folder); }
static OutStream* S_lazy_init(HighlightWriter *self) { if (!self->dat_out) { Segment *segment = self->segment; Folder *folder = self->folder; CharBuf *seg_name = Seg_Get_Name(segment); // Open outstreams. { CharBuf *ix_file = CB_newf("%o/highlight.ix", seg_name); self->ix_out = Folder_Open_Out(folder, ix_file); DECREF(ix_file); if (!self->ix_out) { RETHROW(INCREF(Err_get_error())); } } { CharBuf *dat_file = CB_newf("%o/highlight.dat", seg_name); self->dat_out = Folder_Open_Out(folder, dat_file); DECREF(dat_file); if (!self->dat_out) { RETHROW(INCREF(Err_get_error())); } } // Go past invalid doc 0. OutStream_Write_I64(self->ix_out, 0); } return self->dat_out; }
void LexWriter_start_field(LexiconWriter *self, int32_t field_num) { Segment *const segment = LexWriter_Get_Segment(self); Folder *const folder = LexWriter_Get_Folder(self); Schema *const schema = LexWriter_Get_Schema(self); CharBuf *const seg_name = Seg_Get_Name(segment); CharBuf *const field = Seg_Field_Name(segment, field_num); FieldType *const type = Schema_Fetch_Type(schema, field); // Open outstreams. CB_setf(self->dat_file, "%o/lexicon-%i32.dat", seg_name, field_num); CB_setf(self->ix_file, "%o/lexicon-%i32.ix", seg_name, field_num); CB_setf(self->ixix_file, "%o/lexicon-%i32.ixix", seg_name, field_num); self->dat_out = Folder_Open_Out(folder, self->dat_file); if (!self->dat_out) { RETHROW(INCREF(Err_get_error())); } self->ix_out = Folder_Open_Out(folder, self->ix_file); if (!self->ix_out) { RETHROW(INCREF(Err_get_error())); } self->ixix_out = Folder_Open_Out(folder, self->ixix_file); if (!self->ixix_out) { RETHROW(INCREF(Err_get_error())); } // Initialize count and ix_count, term stepper and term info stepper. self->count = 0; self->ix_count = 0; self->term_stepper = FType_Make_Term_Stepper(type); TermStepper_Reset(self->tinfo_stepper); }
static OutStream* S_lazy_init(HighlightWriter *self) { if (!self->dat_out) { Segment *segment = self->segment; Folder *folder = self->folder; Snapshot *snapshot = HLWriter_Get_Snapshot(self); CharBuf *seg_name = Seg_Get_Name(segment); CharBuf *ix_file = CB_newf("%o/highlight.ix", seg_name); CharBuf *dat_file = CB_newf("%o/highlight.dat", seg_name); /* Open outstreams. */ Snapshot_Add_Entry(snapshot, ix_file); Snapshot_Add_Entry(snapshot, dat_file); self->ix_out = Folder_Open_Out(folder, ix_file); self->dat_out = Folder_Open_Out(folder, dat_file); if (!self->ix_out) { THROW("Can't open %o", ix_file); } if (!self->dat_out) { THROW("Can't open %o", dat_file); } DECREF(ix_file); DECREF(dat_file); /* Go past invalid doc 0. */ OutStream_Write_U64(self->ix_out, 0); } return self->dat_out; }
SegReader* SegReader_init(SegReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, Vector *segments, int32_t seg_tick) { Segment *segment; IxReader_init((IndexReader*)self, schema, folder, snapshot, segments, seg_tick, NULL); SegReaderIVARS *const ivars = SegReader_IVARS(self); segment = SegReader_Get_Segment(self); ivars->doc_max = (int32_t)Seg_Get_Count(segment); ivars->seg_name = (String*)INCREF(Seg_Get_Name(segment)); ivars->seg_num = Seg_Get_Number(segment); Err *error = Err_trap(S_try_init_components, self); if (error) { // An error occurred, so clean up self and rethrow the exception. DECREF(self); RETHROW(error); } DeletionsReader *del_reader = (DeletionsReader*)Hash_Fetch( ivars->components, Class_Get_Name(DELETIONSREADER)); ivars->del_count = del_reader ? DelReader_Del_Count(del_reader) : 0; return self; }
void PostPool_Flip_IMP(PostingPool *self) { PostingPoolIVARS *const ivars = PostPool_IVARS(self); uint32_t num_runs = VA_Get_Size(ivars->runs); uint32_t sub_thresh = num_runs > 0 ? ivars->mem_thresh / num_runs : ivars->mem_thresh; if (num_runs) { Folder *folder = PolyReader_Get_Folder(ivars->polyreader); String *seg_name = Seg_Get_Name(ivars->segment); String *lex_temp_path = Str_newf("%o/lextemp", seg_name); String *post_temp_path = Str_newf("%o/ptemp", seg_name); ivars->lex_temp_in = Folder_Open_In(folder, lex_temp_path); if (!ivars->lex_temp_in) { RETHROW(INCREF(Err_get_error())); } ivars->post_temp_in = Folder_Open_In(folder, post_temp_path); if (!ivars->post_temp_in) { RETHROW(INCREF(Err_get_error())); } DECREF(lex_temp_path); DECREF(post_temp_path); } PostPool_Sort_Buffer(self); if (num_runs && (ivars->buf_max - ivars->buf_tick) > 0) { uint32_t num_items = PostPool_Buffer_Count(self); // Cheap imitation of flush. FIXME. PostingPool *run = PostPool_new(ivars->schema, ivars->snapshot, ivars->segment, ivars->polyreader, ivars->field, ivars->lex_writer, ivars->mem_pool, ivars->lex_temp_out, ivars->post_temp_out, ivars->skip_out); PostPool_Grow_Buffer(run, num_items); PostingPoolIVARS *const run_ivars = PostPool_IVARS(run); memcpy(run_ivars->buffer, (ivars->buffer) + ivars->buf_tick, num_items * sizeof(Obj*)); run_ivars->buf_max = num_items; PostPool_Add_Run(self, (SortExternal*)run); ivars->buf_tick = 0; ivars->buf_max = 0; } // Assign. for (uint32_t i = 0; i < num_runs; i++) { PostingPool *run = (PostingPool*)VA_Fetch(ivars->runs, i); if (run != NULL) { PostPool_Set_Mem_Thresh(run, sub_thresh); if (!PostPool_IVARS(run)->lexicon) { S_fresh_flip(run, ivars->lex_temp_in, ivars->post_temp_in); } } } ivars->flipped = true; }
DefaultHighlightReader* DefHLReader_init(DefaultHighlightReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, VArray *segments, int32_t seg_tick) { Segment *segment; Hash *metadata; HLReader_init((HighlightReader*)self, schema, folder, snapshot, segments, seg_tick); segment = DefHLReader_Get_Segment(self); metadata = (Hash*)Seg_Fetch_Metadata_Str(segment, "highlight", 9); if (!metadata) { metadata = (Hash*)Seg_Fetch_Metadata_Str(segment, "term_vectors", 12); } // Check format. if (metadata) { Obj *format = Hash_Fetch_Str(metadata, "format", 6); if (!format) { THROW(ERR, "Missing 'format' var"); } else { if (Obj_To_I64(format) != HLWriter_current_file_format) { THROW(ERR, "Unsupported highlight data format: %i64", Obj_To_I64(format)); } } } // Open instreams. { CharBuf *seg_name = Seg_Get_Name(segment); CharBuf *ix_file = CB_newf("%o/highlight.ix", seg_name); CharBuf *dat_file = CB_newf("%o/highlight.dat", seg_name); if (Folder_Exists(folder, ix_file)) { self->ix_in = Folder_Open_In(folder, ix_file); if (!self->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } self->dat_in = Folder_Open_In(folder, dat_file); if (!self->dat_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } } DECREF(ix_file); DECREF(dat_file); } return self; }
DefaultDocReader* DefDocReader_init(DefaultDocReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, Vector *segments, int32_t seg_tick) { Hash *metadata; Segment *segment; DocReader_init((DocReader*)self, schema, folder, snapshot, segments, seg_tick); DefaultDocReaderIVARS *const ivars = DefDocReader_IVARS(self); segment = DefDocReader_Get_Segment(self); metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "documents", 9); if (metadata) { String *seg_name = Seg_Get_Name(segment); String *ix_file = Str_newf("%o/documents.ix", seg_name); String *dat_file = Str_newf("%o/documents.dat", seg_name); Obj *format = Hash_Fetch_Utf8(metadata, "format", 6); // Check format. if (!format) { THROW(ERR, "Missing 'format' var"); } else { int64_t format_val = Json_obj_to_i64(format); if (format_val < DocWriter_current_file_format) { THROW(ERR, "Obsolete doc storage format %i64; " "Index regeneration is required", format_val); } else if (format_val != DocWriter_current_file_format) { THROW(ERR, "Unsupported doc storage format: %i64", format_val); } } // Get streams. if (Folder_Exists(folder, ix_file)) { ivars->ix_in = Folder_Open_In(folder, ix_file); if (!ivars->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } ivars->dat_in = Folder_Open_In(folder, dat_file); if (!ivars->dat_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } } DECREF(ix_file); DECREF(dat_file); } return self; }
LexIndex* LexIndex_init(LexIndex *self, Schema *schema, Folder *folder, Segment *segment, String *field) { int32_t field_num = Seg_Field_Num(segment, field); String *seg_name = Seg_Get_Name(segment); String *ixix_file = Str_newf("%o/lexicon-%i32.ixix", seg_name, field_num); String *ix_file = Str_newf("%o/lexicon-%i32.ix", seg_name, field_num); Architecture *arch = Schema_Get_Architecture(schema); // Init. Lex_init((Lexicon*)self, field); LexIndexIVARS *const ivars = LexIndex_IVARS(self); ivars->tinfo = TInfo_new(0); ivars->tick = 0; // Derive ivars->field_type = Schema_Fetch_Type(schema, field); if (!ivars->field_type) { String *mess = MAKE_MESS("Unknown field: '%o'", field); DECREF(ix_file); DECREF(ixix_file); DECREF(self); Err_throw_mess(ERR, mess); } ivars->field_type = (FieldType*)INCREF(ivars->field_type); ivars->term_stepper = FType_Make_Term_Stepper(ivars->field_type); ivars->ixix_in = Folder_Open_In(folder, ixix_file); if (!ivars->ixix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(ixix_file); DECREF(self); RETHROW(error); } ivars->ix_in = Folder_Open_In(folder, ix_file); if (!ivars->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(ixix_file); DECREF(self); RETHROW(error); } ivars->index_interval = Arch_Index_Interval(arch); ivars->skip_interval = Arch_Skip_Interval(arch); ivars->size = (int32_t)(InStream_Length(ivars->ixix_in) / sizeof(int64_t)); ivars->offsets = (const int64_t*)InStream_Buf(ivars->ixix_in, (size_t)InStream_Length(ivars->ixix_in)); DECREF(ixix_file); DECREF(ix_file); return self; }
void HLWriter_delete_segment(HighlightWriter *self, SegReader *reader) { CharBuf *merged_seg_name = Seg_Get_Name(SegReader_Get_Segment(reader)); Snapshot *snapshot = HLWriter_Get_Snapshot(self); CharBuf *ix_file = CB_newf("%o/highlight.ix", merged_seg_name); CharBuf *dat_file = CB_newf("%o/highlight.dat", merged_seg_name); Snapshot_Delete_Entry(snapshot, ix_file); Snapshot_Delete_Entry(snapshot, dat_file); DECREF(ix_file); DECREF(dat_file); }
static void test_seg_name_and_num(TestBatchRunner *runner) { Segment *segment_z = Seg_new(35); String *seg_z_name = Seg_num_to_name(35); TEST_TRUE(runner, Seg_Get_Number(segment_z) == INT64_C(35), "Get_Number"); TEST_TRUE(runner, Str_Equals_Utf8(Seg_Get_Name(segment_z), "seg_z", 5), "Get_Name"); TEST_TRUE(runner, Str_Equals_Utf8(seg_z_name, "seg_z", 5), "num_to_name"); DECREF(seg_z_name); DECREF(segment_z); }
DefaultHighlightReader* DefHLReader_init(DefaultHighlightReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, Vector *segments, int32_t seg_tick) { HLReader_init((HighlightReader*)self, schema, folder, snapshot, segments, seg_tick); DefaultHighlightReaderIVARS *const ivars = DefHLReader_IVARS(self); Segment *segment = DefHLReader_Get_Segment(self); Hash *metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "highlight", 9); if (!metadata) { metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "term_vectors", 12); } // Check format. if (metadata) { Obj *format = Hash_Fetch_Utf8(metadata, "format", 6); if (!format) { THROW(ERR, "Missing 'format' var"); } else { if (Json_obj_to_i64(format) != HLWriter_current_file_format) { THROW(ERR, "Unsupported highlight data format: %i64", Json_obj_to_i64(format)); } } } // Open instreams. String *seg_name = Seg_Get_Name(segment); String *ix_file = Str_newf("%o/highlight.ix", seg_name); String *dat_file = Str_newf("%o/highlight.dat", seg_name); if (Folder_Exists(folder, ix_file)) { ivars->ix_in = Folder_Open_In(folder, ix_file); if (!ivars->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } ivars->dat_in = Folder_Open_In(folder, dat_file); if (!ivars->dat_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } } DECREF(ix_file); DECREF(dat_file); return self; }
static void test_seg_name_and_num(TestBatch *batch) { Segment *segment_z = Seg_new(35); CharBuf *seg_z_name = Seg_num_to_name(35); TEST_TRUE(batch, Seg_Get_Number(segment_z) == I64_C(35), "Get_Number"); TEST_TRUE(batch, CB_Equals_Str(Seg_Get_Name(segment_z), "seg_z", 5), "Get_Name"); TEST_TRUE(batch, CB_Equals_Str(seg_z_name, "seg_z", 5), "num_to_name"); DECREF(seg_z_name); DECREF(segment_z); }
MatchPostingWriter* MatchPostWriter_init(MatchPostingWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader, int32_t field_num) { Folder *folder = PolyReader_Get_Folder(polyreader); CharBuf *filename = CB_newf("%o/postings-%i32.dat", Seg_Get_Name(segment), field_num); PostWriter_init((PostingWriter*)self, schema, snapshot, segment, polyreader, field_num); self->outstream = Folder_Open_Out(folder, filename); if (!self->outstream) { RETHROW(INCREF(Err_get_error())); } DECREF(filename); return self; }
DefaultDocReader* DefDocReader_init(DefaultDocReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, VArray *segments, i32_t seg_tick) { Hash *metadata; Segment *segment; DocReader_init((DocReader*)self, schema, folder, snapshot, segments, seg_tick); segment = DefDocReader_Get_Segment(self); metadata = (Hash*)Seg_Fetch_Metadata_Str(segment, "documents", 9); if (metadata) { CharBuf *seg_name = Seg_Get_Name(segment); CharBuf *ix_file = CB_newf("%o/documents.ix", seg_name); CharBuf *dat_file = CB_newf("%o/documents.dat", seg_name); Obj *format = Hash_Fetch_Str(metadata, "format", 6); /* Check format. */ if (!format) { THROW("Missing 'format' var"); } else { i64_t format_val = Obj_To_I64(format); if (format_val < DocWriter_current_file_format) { THROW("Obsolete doc storage format %i64; " "Index regeneration is required", format_val); } else if (format_val != DocWriter_current_file_format) { THROW("Unsupported doc storage format: %i64", format_val); } } /* Get streams. */ if (Folder_Exists(folder, ix_file)) { self->ix_in = Folder_Open_In(folder, ix_file); self->dat_in = Folder_Open_In(folder, dat_file); if (!self->ix_in || !self->dat_in) { CharBuf *mess = MAKE_MESS("Can't open either %o or %o", ix_file, dat_file); DECREF(ix_file); DECREF(dat_file); DECREF(self); Err_throw_mess(mess); } } DECREF(ix_file); DECREF(dat_file); } return self; }
void SortFieldWriter_Flip_IMP(SortFieldWriter *self) { SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self); uint32_t num_items = SortFieldWriter_Buffer_Count(self); uint32_t num_runs = Vec_Get_Size(ivars->runs); if (ivars->flipped) { THROW(ERR, "Can't call Flip() twice"); } ivars->flipped = true; // Sanity check. if (num_runs && num_items) { THROW(ERR, "Sanity check failed: num_runs: %u32 num_items: %u32", num_runs, num_items); } if (num_items) { SortFieldWriter_Sort_Buffer(self); } else if (num_runs) { Folder *folder = PolyReader_Get_Folder(ivars->polyreader); String *seg_name = Seg_Get_Name(ivars->segment); String *ord_path = Str_newf("%o/sort_ord_temp", seg_name); ivars->ord_in = Folder_Open_In(folder, ord_path); DECREF(ord_path); if (!ivars->ord_in) { RETHROW(INCREF(Err_get_error())); } if (ivars->var_width) { String *ix_path = Str_newf("%o/sort_ix_temp", seg_name); ivars->ix_in = Folder_Open_In(folder, ix_path); DECREF(ix_path); if (!ivars->ix_in) { RETHROW(INCREF(Err_get_error())); } } String *dat_path = Str_newf("%o/sort_dat_temp", seg_name); ivars->dat_in = Folder_Open_In(folder, dat_path); DECREF(dat_path); if (!ivars->dat_in) { RETHROW(INCREF(Err_get_error())); } // Assign streams and a slice of mem_thresh. size_t sub_thresh = ivars->mem_thresh / num_runs; if (sub_thresh < 65536) { sub_thresh = 65536; } for (uint32_t i = 0; i < num_runs; i++) { SortFieldWriter *run = (SortFieldWriter*)Vec_Fetch(ivars->runs, i); S_flip_run(run, sub_thresh, ivars->ord_in, ivars->ix_in, ivars->dat_in); } } ivars->flipped = true; }
LexIndex* LexIndex_init(LexIndex *self, Schema *schema, Folder *folder, Segment *segment, const CharBuf *field) { i32_t field_num = Seg_Field_Num(segment, field); CharBuf *seg_name = Seg_Get_Name(segment); CharBuf *ixix_file = CB_newf("%o/lexicon-%i32.ixix", seg_name, field_num); CharBuf *ix_file = CB_newf("%o/lexicon-%i32.ix", seg_name, field_num); Architecture *arch = Schema_Get_Architecture(schema); /* Init. */ self->term = ViewCB_new_from_trusted_utf8(NULL, 0); self->tinfo = TInfo_new(0,0,0,0); self->tick = 0; /* Derive */ self->field_type = Schema_Fetch_Type(schema, field); if (!self->field_type) { CharBuf *mess = MAKE_MESS("Unknown field: '%o'", field); DECREF(ix_file); DECREF(ixix_file); DECREF(self); Err_throw_mess(mess); } INCREF(self->field_type); self->ixix_in = Folder_Open_In(folder, ixix_file); self->ix_in = Folder_Open_In(folder, ix_file); if (!self->ixix_in || !self->ix_in) { CharBuf *mess = MAKE_MESS("Can't open either %o or %o", ix_file, ixix_file); DECREF(ix_file); DECREF(ixix_file); DECREF(self); Err_throw_mess(mess); } self->index_interval = Arch_Index_Interval(arch); self->skip_interval = Arch_Skip_Interval(arch); self->size = (i32_t)(InStream_Length(self->ixix_in) / sizeof(i64_t)); self->offsets = (i64_t*)InStream_Buf(self->ixix_in, (size_t)InStream_Length(self->ixix_in)); self->data = InStream_Buf(self->ix_in, InStream_Length(self->ix_in)); self->limit = self->data + InStream_Length(self->ix_in); DECREF(ixix_file); DECREF(ix_file); return self; }
void SegWriter_Delete_Segment_IMP(SegWriter *self, SegReader *reader) { SegWriterIVARS *const ivars = SegWriter_IVARS(self); Snapshot *snapshot = SegWriter_Get_Snapshot(self); String *seg_name = Seg_Get_Name(SegReader_Get_Segment(reader)); // Have all the sub-writers delete the segment. for (size_t i = 0, max = Vec_Get_Size(ivars->writers); i < max; i++) { DataWriter *writer = (DataWriter*)Vec_Fetch(ivars->writers, i); DataWriter_Delete_Segment(writer, reader); } DelWriter_Delete_Segment(ivars->del_writer, reader); // Remove seg directory from snapshot. Snapshot_Delete_Entry(snapshot, seg_name); }
void DefDelWriter_Merge_Segment_IMP(DefaultDeletionsWriter *self, SegReader *reader, I32Array *doc_map) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); UNUSED_VAR(doc_map); Segment *segment = SegReader_Get_Segment(reader); Hash *del_meta = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "deletions", 9); if (del_meta) { Vector *seg_readers = ivars->seg_readers; Hash *files = (Hash*)Hash_Fetch_Utf8(del_meta, "files", 5); if (files) { HashIterator *iter = HashIter_new(files); while (HashIter_Next(iter)) { String *seg = HashIter_Get_Key(iter); Hash *mini_meta = (Hash*)HashIter_Get_Value(iter); /* Find the segment the deletions from the SegReader * we're adding correspond to. If it's gone, we don't * need to worry about losing deletions files that point * at it. */ for (size_t i = 0, max = Vec_Get_Size(seg_readers); i < max; i++) { SegReader *candidate = (SegReader*)Vec_Fetch(seg_readers, i); String *candidate_name = Seg_Get_Name(SegReader_Get_Segment(candidate)); if (Str_Equals(seg, (Obj*)candidate_name)) { /* If the count hasn't changed, we're about to * merge away the most recent deletions file * pointing at this target segment -- so force a * new file to be written out. */ int32_t count = (int32_t)Json_obj_to_i64(Hash_Fetch_Utf8(mini_meta, "count", 5)); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Obtain( candidate, Class_Get_Name(DELETIONSREADER)); if (count == DelReader_Del_Count(del_reader)) { ivars->updated[i] = true; } break; } } } DECREF(iter); } } }
void DefDelWriter_merge_segment(DefaultDeletionsWriter *self, SegReader *reader, I32Array *doc_map) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); UNUSED_VAR(doc_map); Segment *segment = SegReader_Get_Segment(reader); Hash *del_meta = (Hash*)Seg_Fetch_Metadata_Str(segment, "deletions", 9); if (del_meta) { VArray *seg_readers = ivars->seg_readers; Hash *files = (Hash*)Hash_Fetch_Str(del_meta, "files", 5); if (files) { CharBuf *seg; Hash *mini_meta; Hash_Iterate(files); while (Hash_Next(files, (Obj**)&seg, (Obj**)&mini_meta)) { /* Find the segment the deletions from the SegReader * we're adding correspond to. If it's gone, we don't * need to worry about losing deletions files that point * at it. */ for (uint32_t i = 0, max = VA_Get_Size(seg_readers); i < max; i++) { SegReader *candidate = (SegReader*)VA_Fetch(seg_readers, i); CharBuf *candidate_name = Seg_Get_Name(SegReader_Get_Segment(candidate)); if (CB_Equals(seg, (Obj*)candidate_name)) { /* If the count hasn't changed, we're about to * merge away the most recent deletions file * pointing at this target segment -- so force a * new file to be written out. */ int32_t count = (int32_t)Obj_To_I64(Hash_Fetch_Str(mini_meta, "count", 5)); DeletionsReader *del_reader = (DeletionsReader*)SegReader_Obtain( candidate, VTable_Get_Name(DELETIONSREADER)); if (count == DelReader_Del_Count(del_reader)) { ivars->updated[i] = true; } break; } } } } } }
// Indicate whether it is safe to build a SegLexicon using the given // parameters. Will return false if the field is not indexed or if no terms // are present for this field in this segment. static bool S_has_data(Schema *schema, Folder *folder, Segment *segment, String *field) { FieldType *type = Schema_Fetch_Type(schema, field); if (!type || !FType_Indexed(type)) { // If the field isn't indexed, bail out. return false; } else { // Bail out if there are no terms for this field in this segment. int32_t field_num = Seg_Field_Num(segment, field); String *seg_name = Seg_Get_Name(segment); String *file = Str_newf("%o/lexicon-%i32.dat", seg_name, field_num); bool retval = Folder_Exists(folder, file); DECREF(file); return retval; } }
void SegWriter_Prep_Seg_Dir_IMP(SegWriter *self) { SegWriterIVARS *const ivars = SegWriter_IVARS(self); Folder *folder = SegWriter_Get_Folder(self); String *seg_name = Seg_Get_Name(ivars->segment); // Clear stale segment files from crashed indexing sessions. if (Folder_Exists(folder, seg_name)) { bool result = Folder_Delete_Tree(folder, seg_name); if (!result) { THROW(ERR, "Couldn't completely remove '%o'", seg_name); } } // Create the segment directory. bool result = Folder_MkDir(folder, seg_name); if (!result) { RETHROW(INCREF(Err_get_error())); } }
void LexWriter_delete_segment(LexiconWriter *self, SegReader *reader) { Snapshot *snapshot = LexWriter_Get_Snapshot(self); CharBuf *merged_seg_name = Seg_Get_Name(SegReader_Get_Segment(reader)); CharBuf *pattern = CB_newf("%o/lexicon", merged_seg_name); VArray *files = Snapshot_List(snapshot); VArray *my_old_files = VA_Grep(files, S_my_file, pattern); u32_t i, max; for (i = 0, max = VA_Get_Size(my_old_files); i < max; i++) { CharBuf *entry = (CharBuf*)VA_Fetch(my_old_files, i); Snapshot_Delete_Entry(snapshot, entry); } DECREF(my_old_files); DECREF(files); DECREF(pattern); }
static SortFieldWriter* S_lazy_init_field_writer(SortWriter *self, int32_t field_num) { SortWriterIVARS *const ivars = SortWriter_IVARS(self); SortFieldWriter *field_writer = (SortFieldWriter*)VA_Fetch(ivars->field_writers, field_num); if (!field_writer) { // Open temp files. if (!ivars->temp_ord_out) { Folder *folder = ivars->folder; CharBuf *seg_name = Seg_Get_Name(ivars->segment); CharBuf *path = CB_newf("%o/sort_ord_temp", seg_name); ivars->temp_ord_out = Folder_Open_Out(folder, path); if (!ivars->temp_ord_out) { DECREF(path); RETHROW(INCREF(Err_get_error())); } CB_setf(path, "%o/sort_ix_temp", seg_name); ivars->temp_ix_out = Folder_Open_Out(folder, path); if (!ivars->temp_ix_out) { DECREF(path); RETHROW(INCREF(Err_get_error())); } CB_setf(path, "%o/sort_dat_temp", seg_name); ivars->temp_dat_out = Folder_Open_Out(folder, path); if (!ivars->temp_dat_out) { DECREF(path); RETHROW(INCREF(Err_get_error())); } DECREF(path); } CharBuf *field = Seg_Field_Name(ivars->segment, field_num); field_writer = SortFieldWriter_new(ivars->schema, ivars->snapshot, ivars->segment, ivars->polyreader, field, ivars->mem_pool, ivars->mem_thresh, ivars->temp_ord_out, ivars->temp_ix_out, ivars->temp_dat_out); VA_Store(ivars->field_writers, field_num, (Obj*)field_writer); } return field_writer; }