void PostPool_Flip_IMP(PostingPool *self) { PostingPoolIVARS *const ivars = PostPool_IVARS(self); uint32_t num_runs = VA_Get_Size(ivars->runs); uint32_t sub_thresh = num_runs > 0 ? ivars->mem_thresh / num_runs : ivars->mem_thresh; if (num_runs) { Folder *folder = PolyReader_Get_Folder(ivars->polyreader); String *seg_name = Seg_Get_Name(ivars->segment); String *lex_temp_path = Str_newf("%o/lextemp", seg_name); String *post_temp_path = Str_newf("%o/ptemp", seg_name); ivars->lex_temp_in = Folder_Open_In(folder, lex_temp_path); if (!ivars->lex_temp_in) { RETHROW(INCREF(Err_get_error())); } ivars->post_temp_in = Folder_Open_In(folder, post_temp_path); if (!ivars->post_temp_in) { RETHROW(INCREF(Err_get_error())); } DECREF(lex_temp_path); DECREF(post_temp_path); } PostPool_Sort_Buffer(self); if (num_runs && (ivars->buf_max - ivars->buf_tick) > 0) { uint32_t num_items = PostPool_Buffer_Count(self); // Cheap imitation of flush. FIXME. PostingPool *run = PostPool_new(ivars->schema, ivars->snapshot, ivars->segment, ivars->polyreader, ivars->field, ivars->lex_writer, ivars->mem_pool, ivars->lex_temp_out, ivars->post_temp_out, ivars->skip_out); PostPool_Grow_Buffer(run, num_items); PostingPoolIVARS *const run_ivars = PostPool_IVARS(run); memcpy(run_ivars->buffer, (ivars->buffer) + ivars->buf_tick, num_items * sizeof(Obj*)); run_ivars->buf_max = num_items; PostPool_Add_Run(self, (SortExternal*)run); ivars->buf_tick = 0; ivars->buf_max = 0; } // Assign. for (uint32_t i = 0; i < num_runs; i++) { PostingPool *run = (PostingPool*)VA_Fetch(ivars->runs, i); if (run != NULL) { PostPool_Set_Mem_Thresh(run, sub_thresh); if (!PostPool_IVARS(run)->lexicon) { S_fresh_flip(run, ivars->lex_temp_in, ivars->post_temp_in); } } } ivars->flipped = true; }
DataWriter* DataWriter_init(DataWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader) { DataWriterIVARS *const ivars = DataWriter_IVARS(self); ivars->snapshot = (Snapshot*)INCREF(snapshot); ivars->segment = (Segment*)INCREF(segment); ivars->polyreader = (PolyReader*)INCREF(polyreader); ivars->schema = (Schema*)INCREF(schema); ivars->folder = (Folder*)INCREF(PolyReader_Get_Folder(polyreader)); ABSTRACT_CLASS_CHECK(self, DATAWRITER); return self; }
MatchPostingWriter* MatchPostWriter_init(MatchPostingWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader, int32_t field_num) { Folder *folder = PolyReader_Get_Folder(polyreader); CharBuf *filename = CB_newf("%o/postings-%i32.dat", Seg_Get_Name(segment), field_num); PostWriter_init((PostingWriter*)self, schema, snapshot, segment, polyreader, field_num); self->outstream = Folder_Open_Out(folder, filename); if (!self->outstream) { RETHROW(INCREF(Err_get_error())); } DECREF(filename); return self; }
void SortFieldWriter_Flip_IMP(SortFieldWriter *self) { SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self); uint32_t num_items = SortFieldWriter_Buffer_Count(self); uint32_t num_runs = Vec_Get_Size(ivars->runs); if (ivars->flipped) { THROW(ERR, "Can't call Flip() twice"); } ivars->flipped = true; // Sanity check. if (num_runs && num_items) { THROW(ERR, "Sanity check failed: num_runs: %u32 num_items: %u32", num_runs, num_items); } if (num_items) { SortFieldWriter_Sort_Buffer(self); } else if (num_runs) { Folder *folder = PolyReader_Get_Folder(ivars->polyreader); String *seg_name = Seg_Get_Name(ivars->segment); String *ord_path = Str_newf("%o/sort_ord_temp", seg_name); ivars->ord_in = Folder_Open_In(folder, ord_path); DECREF(ord_path); if (!ivars->ord_in) { RETHROW(INCREF(Err_get_error())); } if (ivars->var_width) { String *ix_path = Str_newf("%o/sort_ix_temp", seg_name); ivars->ix_in = Folder_Open_In(folder, ix_path); DECREF(ix_path); if (!ivars->ix_in) { RETHROW(INCREF(Err_get_error())); } } String *dat_path = Str_newf("%o/sort_dat_temp", seg_name); ivars->dat_in = Folder_Open_In(folder, dat_path); DECREF(dat_path); if (!ivars->dat_in) { RETHROW(INCREF(Err_get_error())); } // Assign streams and a slice of mem_thresh. size_t sub_thresh = ivars->mem_thresh / num_runs; if (sub_thresh < 65536) { sub_thresh = 65536; } for (uint32_t i = 0; i < num_runs; i++) { SortFieldWriter *run = (SortFieldWriter*)Vec_Fetch(ivars->runs, i); S_flip_run(run, sub_thresh, ivars->ord_in, ivars->ix_in, ivars->dat_in); } } ivars->flipped = true; }
int32_t SortFieldWriter_Finish_IMP(SortFieldWriter *self) { SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self); // Bail if there's no data. if (!SortFieldWriter_Peek(self)) { return 0; } int32_t field_num = ivars->field_num; Folder *folder = PolyReader_Get_Folder(ivars->polyreader); String *seg_name = Seg_Get_Name(ivars->segment); // Open streams. String *ord_path = Str_newf("%o/sort-%i32.ord", seg_name, field_num); OutStream *ord_out = Folder_Open_Out(folder, ord_path); DECREF(ord_path); if (!ord_out) { RETHROW(INCREF(Err_get_error())); } OutStream *ix_out = NULL; if (ivars->var_width) { String *ix_path = Str_newf("%o/sort-%i32.ix", seg_name, field_num); ix_out = Folder_Open_Out(folder, ix_path); DECREF(ix_path); if (!ix_out) { RETHROW(INCREF(Err_get_error())); } } String *dat_path = Str_newf("%o/sort-%i32.dat", seg_name, field_num); OutStream *dat_out = Folder_Open_Out(folder, dat_path); DECREF(dat_path); if (!dat_out) { RETHROW(INCREF(Err_get_error())); } int32_t cardinality = S_write_files(self, ord_out, ix_out, dat_out); // Close streams. OutStream_Close(ord_out); if (ix_out) { OutStream_Close(ix_out); } OutStream_Close(dat_out); DECREF(dat_out); DECREF(ix_out); DECREF(ord_out); return cardinality; }
void S_try_open_elements(void *context) { struct try_open_elements_context *args = (struct try_open_elements_context*)context; PolyReader *self = args->self; PolyReaderIVARS *const ivars = PolyReader_IVARS(self); VArray *files = Snapshot_List(ivars->snapshot); Folder *folder = PolyReader_Get_Folder(self); uint32_t num_segs = 0; uint64_t latest_schema_gen = 0; CharBuf *schema_file = NULL; // Find schema file, count segments. for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) { CharBuf *entry = (CharBuf*)VA_Fetch(files, i); if (Seg_valid_seg_name(entry)) { num_segs++; } else if (CB_Starts_With_Str(entry, "schema_", 7) && CB_Ends_With_Str(entry, ".json", 5) ) { uint64_t gen = IxFileNames_extract_gen(entry); if (gen > latest_schema_gen) { latest_schema_gen = gen; if (!schema_file) { schema_file = CB_Clone(entry); } else { CB_Mimic(schema_file, (Obj*)entry); } } } } // Read Schema. if (!schema_file) { DECREF(files); THROW(ERR, "Can't find a schema file."); } else { Hash *dump = (Hash*)Json_slurp_json(folder, schema_file); if (dump) { // read file successfully DECREF(ivars->schema); ivars->schema = (Schema*)CERTIFY( VTable_Load_Obj(SCHEMA, (Obj*)dump), SCHEMA); DECREF(dump); DECREF(schema_file); schema_file = NULL; } else { CharBuf *mess = MAKE_MESS("Failed to parse %o", schema_file); DECREF(schema_file); DECREF(files); Err_throw_mess(ERR, mess); } } VArray *segments = VA_new(num_segs); for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) { CharBuf *entry = (CharBuf*)VA_Fetch(files, i); // Create a Segment for each segmeta. if (Seg_valid_seg_name(entry)) { int64_t seg_num = IxFileNames_extract_gen(entry); Segment *segment = Seg_new(seg_num); // Bail if reading the file fails (probably because it's been // deleted and a new snapshot file has been written so we need to // retry). if (Seg_Read_File(segment, folder)) { VA_Push(segments, (Obj*)segment); } else { CharBuf *mess = MAKE_MESS("Failed to read %o", entry); DECREF(segment); DECREF(segments); DECREF(files); Err_throw_mess(ERR, mess); } } } // Sort the segments by age. VA_Sort(segments, NULL, NULL); // Open individual SegReaders. struct try_open_segreader_context seg_context; seg_context.schema = PolyReader_Get_Schema(self); seg_context.folder = folder; seg_context.snapshot = PolyReader_Get_Snapshot(self); seg_context.segments = segments; seg_context.result = NULL; args->seg_readers = VA_new(num_segs); Err *error = NULL; for (uint32_t seg_tick = 0; seg_tick < num_segs; seg_tick++) { seg_context.seg_tick = seg_tick; error = Err_trap(S_try_open_segreader, &seg_context); if (error) { break; } VA_Push(args->seg_readers, (Obj*)seg_context.result); seg_context.result = NULL; } DECREF(segments); DECREF(files); if (error) { DECREF(args->seg_readers); args->seg_readers = NULL; RETHROW(error); } }