static void test_fields(TestBatch *batch) { Segment *segment = Seg_new(1); ZombieCharBuf *foo = ZCB_WRAP_STR("foo",3 ); ZombieCharBuf *bar = ZCB_WRAP_STR("bar", 3); ZombieCharBuf *baz = ZCB_WRAP_STR("baz", 3); int32_t field_num; field_num = Seg_Add_Field(segment, (CharBuf*)foo); TEST_TRUE(batch, field_num == 1, "Add_Field returns field number, and field numbers start at 1"); field_num = Seg_Add_Field(segment, (CharBuf*)bar); TEST_TRUE(batch, field_num == 2, "add a second field"); field_num = Seg_Add_Field(segment, (CharBuf*)foo); TEST_TRUE(batch, field_num == 1, "Add_Field returns existing field number if field is already known"); TEST_TRUE(batch, ZCB_Equals(bar, (Obj*)Seg_Field_Name(segment, 2)), "Field_Name"); TEST_TRUE(batch, Seg_Field_Name(segment, 3) == NULL, "Field_Name returns NULL for unknown field number"); TEST_TRUE(batch, Seg_Field_Num(segment, (CharBuf*)bar) == 2, "Field_Num"); TEST_TRUE(batch, Seg_Field_Num(segment, (CharBuf*)baz) == 0, "Field_Num returns 0 for unknown field name"); DECREF(segment); }
static void test_fields(TestBatchRunner *runner) { Segment *segment = Seg_new(1); StackString *foo = SSTR_WRAP_UTF8("foo", 3); StackString *bar = SSTR_WRAP_UTF8("bar", 3); StackString *baz = SSTR_WRAP_UTF8("baz", 3); int32_t field_num; field_num = Seg_Add_Field(segment, (String*)foo); TEST_TRUE(runner, field_num == 1, "Add_Field returns field number, and field numbers start at 1"); field_num = Seg_Add_Field(segment, (String*)bar); TEST_TRUE(runner, field_num == 2, "add a second field"); field_num = Seg_Add_Field(segment, (String*)foo); TEST_TRUE(runner, field_num == 1, "Add_Field returns existing field number if field is already known"); TEST_TRUE(runner, SStr_Equals(bar, (Obj*)Seg_Field_Name(segment, 2)), "Field_Name"); TEST_TRUE(runner, Seg_Field_Name(segment, 3) == NULL, "Field_Name returns NULL for unknown field number"); TEST_TRUE(runner, Seg_Field_Num(segment, (String*)bar) == 2, "Field_Num"); TEST_TRUE(runner, Seg_Field_Num(segment, (String*)baz) == 0, "Field_Num returns 0 for unknown field name"); DECREF(segment); }
void LexWriter_finish_field(LexiconWriter *self, int32_t field_num) { CharBuf *field = Seg_Field_Name(self->segment, field_num); // Store count of terms for this field as metadata. Hash_Store(self->counts, (Obj*)field, (Obj*)CB_newf("%i32", self->count)); Hash_Store(self->ix_counts, (Obj*)field, (Obj*)CB_newf("%i32", self->ix_count)); // Close streams. OutStream_Close(self->dat_out); OutStream_Close(self->ix_out); OutStream_Close(self->ixix_out); DECREF(self->dat_out); DECREF(self->ix_out); DECREF(self->ixix_out); self->dat_out = NULL; self->ix_out = NULL; self->ixix_out = NULL; // Close term stepper. DECREF(self->term_stepper); self->term_stepper = NULL; }
void LexWriter_start_field(LexiconWriter *self, int32_t field_num) { Segment *const segment = LexWriter_Get_Segment(self); Folder *const folder = LexWriter_Get_Folder(self); Schema *const schema = LexWriter_Get_Schema(self); CharBuf *const seg_name = Seg_Get_Name(segment); CharBuf *const field = Seg_Field_Name(segment, field_num); FieldType *const type = Schema_Fetch_Type(schema, field); // Open outstreams. CB_setf(self->dat_file, "%o/lexicon-%i32.dat", seg_name, field_num); CB_setf(self->ix_file, "%o/lexicon-%i32.ix", seg_name, field_num); CB_setf(self->ixix_file, "%o/lexicon-%i32.ixix", seg_name, field_num); self->dat_out = Folder_Open_Out(folder, self->dat_file); if (!self->dat_out) { RETHROW(INCREF(Err_get_error())); } self->ix_out = Folder_Open_Out(folder, self->ix_file); if (!self->ix_out) { RETHROW(INCREF(Err_get_error())); } self->ixix_out = Folder_Open_Out(folder, self->ixix_file); if (!self->ixix_out) { RETHROW(INCREF(Err_get_error())); } // Initialize count and ix_count, term stepper and term info stepper. self->count = 0; self->ix_count = 0; self->term_stepper = FType_Make_Term_Stepper(type); TermStepper_Reset(self->tinfo_stepper); }
static SortFieldWriter* S_lazy_init_field_writer(SortWriter *self, int32_t field_num) { SortWriterIVARS *const ivars = SortWriter_IVARS(self); SortFieldWriter *field_writer = (SortFieldWriter*)VA_Fetch(ivars->field_writers, field_num); if (!field_writer) { // Open temp files. if (!ivars->temp_ord_out) { Folder *folder = ivars->folder; CharBuf *seg_name = Seg_Get_Name(ivars->segment); CharBuf *path = CB_newf("%o/sort_ord_temp", seg_name); ivars->temp_ord_out = Folder_Open_Out(folder, path); if (!ivars->temp_ord_out) { DECREF(path); RETHROW(INCREF(Err_get_error())); } CB_setf(path, "%o/sort_ix_temp", seg_name); ivars->temp_ix_out = Folder_Open_Out(folder, path); if (!ivars->temp_ix_out) { DECREF(path); RETHROW(INCREF(Err_get_error())); } CB_setf(path, "%o/sort_dat_temp", seg_name); ivars->temp_dat_out = Folder_Open_Out(folder, path); if (!ivars->temp_dat_out) { DECREF(path); RETHROW(INCREF(Err_get_error())); } DECREF(path); } CharBuf *field = Seg_Field_Name(ivars->segment, field_num); field_writer = SortFieldWriter_new(ivars->schema, ivars->snapshot, ivars->segment, ivars->polyreader, field, ivars->mem_pool, ivars->mem_thresh, ivars->temp_ord_out, ivars->temp_ix_out, ivars->temp_dat_out); VA_Store(ivars->field_writers, field_num, (Obj*)field_writer); } return field_writer; }
static SortFieldWriter* S_lazy_init_field_writer(SortWriter *self, int32_t field_num) { SortWriterIVARS *const ivars = SortWriter_IVARS(self); SortFieldWriter *field_writer = (SortFieldWriter*)Vec_Fetch(ivars->field_writers, (size_t)field_num); if (!field_writer) { // Open temp files. if (!ivars->temp_ord_out) { Folder *folder = ivars->folder; String *seg_name = Seg_Get_Name(ivars->segment); String *ord_path = Str_newf("%o/sort_ord_temp", seg_name); ivars->temp_ord_out = Folder_Open_Out(folder, ord_path); DECREF(ord_path); if (!ivars->temp_ord_out) { RETHROW(INCREF(Err_get_error())); } String *ix_path = Str_newf("%o/sort_ix_temp", seg_name); ivars->temp_ix_out = Folder_Open_Out(folder, ix_path); DECREF(ix_path); if (!ivars->temp_ix_out) { RETHROW(INCREF(Err_get_error())); } String *dat_path = Str_newf("%o/sort_dat_temp", seg_name); ivars->temp_dat_out = Folder_Open_Out(folder, dat_path); DECREF(dat_path); if (!ivars->temp_dat_out) { RETHROW(INCREF(Err_get_error())); } } String *field = Seg_Field_Name(ivars->segment, field_num); field_writer = SortFieldWriter_new(ivars->schema, ivars->snapshot, ivars->segment, ivars->polyreader, field, ivars->counter, ivars->mem_thresh, ivars->temp_ord_out, ivars->temp_ix_out, ivars->temp_dat_out); Vec_Store(ivars->field_writers, (size_t)field_num, (Obj*)field_writer); } return field_writer; }
DefaultLexiconReader* DefLexReader_init(DefaultLexiconReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, VArray *segments, int32_t seg_tick) { // Init. LexReader_init((LexiconReader*)self, schema, folder, snapshot, segments, seg_tick); DefaultLexiconReaderIVARS *const ivars = DefLexReader_IVARS(self); Segment *segment = DefLexReader_Get_Segment(self); // Build an array of SegLexicon objects. ivars->lexicons = VA_new(Schema_Num_Fields(schema)); for (uint32_t i = 1, max = Schema_Num_Fields(schema) + 1; i < max; i++) { String *field = Seg_Field_Name(segment, i); if (field && S_has_data(schema, folder, segment, field)) { SegLexicon *lexicon = SegLex_new(schema, folder, segment, field); VA_Store(ivars->lexicons, i, (Obj*)lexicon); } } return self; }
DefaultLexiconReader* DefLexReader_init(DefaultLexiconReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, VArray *segments, i32_t seg_tick) { Segment *segment; u32_t i, max; /* Init. */ LexReader_init((LexiconReader*)self, schema, folder, snapshot, segments, seg_tick); segment = DefLexReader_Get_Segment(self); /* Build an array of SegLexicon objects. */ self->lexicons = VA_new(Schema_Num_Fields(schema)); for (i = 1, max = Schema_Num_Fields(schema) + 1; i < max; i++) { CharBuf *field = Seg_Field_Name(segment, i); if (field && S_has_data(schema, folder, segment, field)) { SegLexicon *lexicon = SegLex_new(schema, folder, segment, field); VA_Store(self->lexicons, i, (Obj*)lexicon); } } return self; }
void SortWriter_finish(SortWriter *self) { SortWriterIVARS *const ivars = SortWriter_IVARS(self); VArray *const field_writers = ivars->field_writers; // If we have no data, bail out. if (!ivars->temp_ord_out) { return; } // If we've either flushed or added segments, flush everything so that any // one field can use the entire margin up to mem_thresh. if (ivars->flush_at_finish) { for (uint32_t i = 1, max = VA_Get_Size(field_writers); i < max; i++) { SortFieldWriter *field_writer = (SortFieldWriter*)VA_Fetch(field_writers, i); if (field_writer) { SortFieldWriter_Flush(field_writer); } } } // Close down temp streams. OutStream_Close(ivars->temp_ord_out); OutStream_Close(ivars->temp_ix_out); OutStream_Close(ivars->temp_dat_out); for (uint32_t i = 1, max = VA_Get_Size(field_writers); i < max; i++) { SortFieldWriter *field_writer = (SortFieldWriter*)VA_Delete(field_writers, i); if (field_writer) { CharBuf *field = Seg_Field_Name(ivars->segment, i); SortFieldWriter_Flip(field_writer); int32_t count = SortFieldWriter_Finish(field_writer); Hash_Store(ivars->counts, (Obj*)field, (Obj*)CB_newf("%i32", count)); int32_t null_ord = SortFieldWriter_Get_Null_Ord(field_writer); if (null_ord != -1) { Hash_Store(ivars->null_ords, (Obj*)field, (Obj*)CB_newf("%i32", null_ord)); } int32_t ord_width = SortFieldWriter_Get_Ord_Width(field_writer); Hash_Store(ivars->ord_widths, (Obj*)field, (Obj*)CB_newf("%i32", ord_width)); } DECREF(field_writer); } VA_Clear(field_writers); // Store metadata. Seg_Store_Metadata_Str(ivars->segment, "sort", 4, (Obj*)SortWriter_Metadata(self)); // Clean up. Folder *folder = ivars->folder; CharBuf *seg_name = Seg_Get_Name(ivars->segment); CharBuf *path = CB_newf("%o/sort_ord_temp", seg_name); Folder_Delete(folder, path); CB_setf(path, "%o/sort_ix_temp", seg_name); Folder_Delete(folder, path); CB_setf(path, "%o/sort_dat_temp", seg_name); Folder_Delete(folder, path); DECREF(path); }
SortCache* SortCache_init(SortCache *self, Schema *schema, Folder *folder, Segment *segment, i32_t field_num) { CharBuf *field = Seg_Field_Name(segment, field_num); CharBuf *seg_name = Seg_Get_Name(segment); CharBuf *ord_file = CB_newf("%o/sort-%i32.ord", seg_name, field_num); CharBuf *ix_file = CB_newf("%o/sort-%i32.ix", seg_name, field_num); CharBuf *dat_file = CB_newf("%o/sort-%i32.dat", seg_name, field_num); i64_t ord_len, ix_len, dat_len; /* Derive. */ self->doc_max = Seg_Get_Count(segment); self->type = Schema_Fetch_Type(schema, field); if (!self->type || !FType_Sortable(self->type)) { THROW("'%o' isn't a sortable field", field); } /* Open instreams. */ self->ord_in = Folder_Open_In(folder, ord_file); self->ix_in = Folder_Open_In(folder, ix_file); self->dat_in = Folder_Open_In(folder, dat_file); if (!self->ix_in || !self->dat_in || !self->ord_in) { CharBuf *mess = MAKE_MESS("Can't open either %o, %o or %o", ord_file, ix_file, dat_file); DECREF(ord_file); DECREF(ix_file); DECREF(dat_file); Err_throw_mess(mess); } ord_len = InStream_Length(self->ord_in); ix_len = InStream_Length(self->ix_in); dat_len = InStream_Length(self->dat_in); /* Calculate the number of unique values and derive the ord bit width. */ self->num_uniq = (i32_t)(ix_len / 8) - 1; self->width = S_calc_width(self->num_uniq); /* Validate file lengths. */ { double bytes_per_doc = self->width / 8.0; double max_ords = ord_len / bytes_per_doc; if (max_ords < self->doc_max + 1) { THROW("Conflict between ord count max %f64 and doc_max %i32", max_ords, self->doc_max); } } /* Mmap ords, offsets and character data. */ self->ords = InStream_Buf(self->ord_in, (size_t)ord_len); self->offsets = (i64_t*)InStream_Buf(self->ix_in, (size_t)ix_len); self->char_data = InStream_Buf(self->dat_in, dat_len); { char *offs = (char*)self->offsets; self->offsets_limit = (i64_t*)(offs + ix_len); self->char_data_limit = self->char_data + dat_len; } DECREF(ord_file); DECREF(ix_file); DECREF(dat_file); return self; }
static void S_flip_run(SortFieldWriter *run, size_t sub_thresh, InStream *ord_in, InStream *ix_in, InStream *dat_in) { SortFieldWriterIVARS *const run_ivars = SortFieldWriter_IVARS(run); if (run_ivars->flipped) { THROW(ERR, "Can't Flip twice"); } run_ivars->flipped = true; // Get our own slice of mem_thresh. DECREF(run_ivars->counter); run_ivars->counter = Counter_new(); run_ivars->mem_thresh = sub_thresh; // Done if we already have a SortCache to read from. if (run_ivars->sort_cache) { return; } // Open the temp files for reading. String *seg_name = Seg_Get_Name(run_ivars->segment); String *ord_alias = Str_newf("%o/sort_ord_temp-%i64-to-%i64", seg_name, run_ivars->ord_start, run_ivars->ord_end); InStream *ord_in_dupe = InStream_Reopen(ord_in, ord_alias, run_ivars->ord_start, run_ivars->ord_end - run_ivars->ord_start); DECREF(ord_alias); InStream *ix_in_dupe = NULL; if (run_ivars->var_width) { String *ix_alias = Str_newf("%o/sort_ix_temp-%i64-to-%i64", seg_name, run_ivars->ix_start, run_ivars->ix_end); ix_in_dupe = InStream_Reopen(ix_in, ix_alias, run_ivars->ix_start, run_ivars->ix_end - run_ivars->ix_start); DECREF(ix_alias); } String *dat_alias = Str_newf("%o/sort_dat_temp-%i64-to-%i64", seg_name, run_ivars->dat_start, run_ivars->dat_end); InStream *dat_in_dupe = InStream_Reopen(dat_in, dat_alias, run_ivars->dat_start, run_ivars->dat_end - run_ivars->dat_start); DECREF(dat_alias); // Get a SortCache. String *field = Seg_Field_Name(run_ivars->segment, run_ivars->field_num); switch (run_ivars->prim_id & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: run_ivars->sort_cache = (SortCache*)TextSortCache_new( field, run_ivars->type, run_ivars->run_cardinality, run_ivars->run_max, run_ivars->null_ord, run_ivars->ord_width, ord_in_dupe, ix_in_dupe, dat_in_dupe); break; case FType_INT32: run_ivars->sort_cache = (SortCache*)I32SortCache_new( field, run_ivars->type, run_ivars->run_cardinality, run_ivars->run_max, run_ivars->null_ord, run_ivars->ord_width, ord_in_dupe, dat_in_dupe); break; case FType_INT64: run_ivars->sort_cache = (SortCache*)I64SortCache_new( field, run_ivars->type, run_ivars->run_cardinality, run_ivars->run_max, run_ivars->null_ord, run_ivars->ord_width, ord_in_dupe, dat_in_dupe); break; case FType_FLOAT32: run_ivars->sort_cache = (SortCache*)F32SortCache_new( field, run_ivars->type, run_ivars->run_cardinality, run_ivars->run_max, run_ivars->null_ord, run_ivars->ord_width, ord_in_dupe, dat_in_dupe); break; case FType_FLOAT64: run_ivars->sort_cache = (SortCache*)F64SortCache_new( field, run_ivars->type, run_ivars->run_cardinality, run_ivars->run_max, run_ivars->null_ord, run_ivars->ord_width, ord_in_dupe, dat_in_dupe); break; default: THROW(ERR, "No SortCache class for %o", run_ivars->type); } DECREF(ord_in_dupe); DECREF(ix_in_dupe); DECREF(dat_in_dupe); }