LexiconWriter* LexWriter_init(LexiconWriter *self, Snapshot *snapshot, Segment *segment, PolyReader *polyreader) { Schema *schema = PolyReader_Get_Schema(polyreader); Architecture *arch = Schema_Get_Architecture(schema); DataWriter_init((DataWriter*)self, snapshot, segment, polyreader); /* Assign. */ self->index_interval = Arch_Index_Interval(arch); self->skip_interval = Arch_Skip_Interval(arch); /* Init. */ self->ix_out = NULL; self->ixix_out = NULL; self->dat_out = NULL; self->count = 0; self->ix_count = 0; self->last_tinfo = TInfo_new(0,0,0,0); self->last_text = CB_new(40); self->dat_file = CB_new(30); self->ix_file = CB_new(30); self->ixix_file = CB_new(30); self->counts = Hash_new(0); self->ix_counts = Hash_new(0); self->stepper = NULL; self->temp_mode = false; /* Derive. */ self->stepper = LexStepper_new((CharBuf*)&EMPTY, self->skip_interval); return self; }
static void S_try_init_components(void *context) { SegReader *self = (SegReader*)context; Schema *schema = SegReader_Get_Schema(self); Architecture *arch = Schema_Get_Architecture(schema); Arch_Init_Seg_Reader(arch, self); }
LexiconWriter* LexWriter_init(LexiconWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader) { Architecture *arch = Schema_Get_Architecture(schema); DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader); // Assign. self->index_interval = Arch_Index_Interval(arch); self->skip_interval = Arch_Skip_Interval(arch); // Init. self->ix_out = NULL; self->ixix_out = NULL; self->dat_out = NULL; self->count = 0; self->ix_count = 0; self->dat_file = CB_new(30); self->ix_file = CB_new(30); self->ixix_file = CB_new(30); self->counts = Hash_new(0); self->ix_counts = Hash_new(0); self->temp_mode = false; self->term_stepper = NULL; self->tinfo_stepper = (TermStepper*)MatchTInfoStepper_new(schema); return self; }
MatchTermInfoStepper* MatchTInfoStepper_init(MatchTermInfoStepper *self, Schema *schema) { Architecture *arch = Schema_Get_Architecture(schema); TermStepper_init((TermStepper*)self); self->skip_interval = Arch_Skip_Interval(arch); self->value = (Obj*)TInfo_new(0); return self; }
SegWriter* SegWriter_init(SegWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader) { Architecture *arch = Schema_Get_Architecture(schema); DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader); SegWriterIVARS *const ivars = SegWriter_IVARS(self); ivars->by_api = Hash_new(0); ivars->inverter = Inverter_new(schema, segment); ivars->writers = Vec_new(16); Arch_Init_Seg_Writer(arch, self); return self; }
LexIndex* LexIndex_init(LexIndex *self, Schema *schema, Folder *folder, Segment *segment, String *field) { int32_t field_num = Seg_Field_Num(segment, field); String *seg_name = Seg_Get_Name(segment); String *ixix_file = Str_newf("%o/lexicon-%i32.ixix", seg_name, field_num); String *ix_file = Str_newf("%o/lexicon-%i32.ix", seg_name, field_num); Architecture *arch = Schema_Get_Architecture(schema); // Init. Lex_init((Lexicon*)self, field); LexIndexIVARS *const ivars = LexIndex_IVARS(self); ivars->tinfo = TInfo_new(0); ivars->tick = 0; // Derive ivars->field_type = Schema_Fetch_Type(schema, field); if (!ivars->field_type) { String *mess = MAKE_MESS("Unknown field: '%o'", field); DECREF(ix_file); DECREF(ixix_file); DECREF(self); Err_throw_mess(ERR, mess); } ivars->field_type = (FieldType*)INCREF(ivars->field_type); ivars->term_stepper = FType_Make_Term_Stepper(ivars->field_type); ivars->ixix_in = Folder_Open_In(folder, ixix_file); if (!ivars->ixix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(ixix_file); DECREF(self); RETHROW(error); } ivars->ix_in = Folder_Open_In(folder, ix_file); if (!ivars->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(ixix_file); DECREF(self); RETHROW(error); } ivars->index_interval = Arch_Index_Interval(arch); ivars->skip_interval = Arch_Skip_Interval(arch); ivars->size = (int32_t)(InStream_Length(ivars->ixix_in) / sizeof(int64_t)); ivars->offsets = (const int64_t*)InStream_Buf(ivars->ixix_in, (size_t)InStream_Length(ivars->ixix_in)); DECREF(ixix_file); DECREF(ix_file); return self; }
LexIndex* LexIndex_init(LexIndex *self, Schema *schema, Folder *folder, Segment *segment, const CharBuf *field) { i32_t field_num = Seg_Field_Num(segment, field); CharBuf *seg_name = Seg_Get_Name(segment); CharBuf *ixix_file = CB_newf("%o/lexicon-%i32.ixix", seg_name, field_num); CharBuf *ix_file = CB_newf("%o/lexicon-%i32.ix", seg_name, field_num); Architecture *arch = Schema_Get_Architecture(schema); /* Init. */ self->term = ViewCB_new_from_trusted_utf8(NULL, 0); self->tinfo = TInfo_new(0,0,0,0); self->tick = 0; /* Derive */ self->field_type = Schema_Fetch_Type(schema, field); if (!self->field_type) { CharBuf *mess = MAKE_MESS("Unknown field: '%o'", field); DECREF(ix_file); DECREF(ixix_file); DECREF(self); Err_throw_mess(mess); } INCREF(self->field_type); self->ixix_in = Folder_Open_In(folder, ixix_file); self->ix_in = Folder_Open_In(folder, ix_file); if (!self->ixix_in || !self->ix_in) { CharBuf *mess = MAKE_MESS("Can't open either %o or %o", ix_file, ixix_file); DECREF(ix_file); DECREF(ixix_file); DECREF(self); Err_throw_mess(mess); } self->index_interval = Arch_Index_Interval(arch); self->skip_interval = Arch_Skip_Interval(arch); self->size = (i32_t)(InStream_Length(self->ixix_in) / sizeof(i64_t)); self->offsets = (i64_t*)InStream_Buf(self->ixix_in, (size_t)InStream_Length(self->ixix_in)); self->data = InStream_Buf(self->ix_in, InStream_Length(self->ix_in)); self->limit = self->data + InStream_Length(self->ix_in); DECREF(ixix_file); DECREF(ix_file); return self; }
PostingPool* PostPool_init(PostingPool *self, Schema *schema, const CharBuf *field, MemoryPool *mem_pool) { Architecture *arch = Schema_Get_Architecture(schema); /* Init. */ SortExRun_init((SortExRun*)self); self->lex_instream = NULL; self->post_instream = NULL; self->lex_start = I64_MAX; self->post_start = I64_MAX; self->lex_end = 0; self->post_end = 0; self->flipped = false; self->from_seg = false; self->mem_thresh = 0; self->doc_base = 0; self->last_doc_id = 0; self->doc_map = NULL; self->post_count = 0; self->scratch = NULL; self->scratch_cap = 0; self->lex_stepper = LexStepper_new(field, Arch_Skip_Interval(arch)); /* Assign. */ self->schema = (Schema*)INCREF(schema); self->mem_pool = (MemoryPool*)INCREF(mem_pool); self->field = CB_Clone(field); /* Derive. */ self->posting = Schema_Fetch_Posting(schema, field); self->posting = (Posting*)Post_Clone(self->posting); self->type = (FieldType*)INCREF(Schema_Fetch_Type(schema, field)); self->compare = PostPoolQ_compare_rawp; return self; }
static void S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer, OutStream *skip_stream) { PostingPoolIVARS *const ivars = PostPool_IVARS(self); TermInfo *const tinfo = TInfo_new(0); TermInfo *const skip_tinfo = TInfo_new(0); TermInfoIVARS *const tinfo_ivars = TInfo_IVARS(tinfo); TermInfoIVARS *const skip_tinfo_ivars = TInfo_IVARS(skip_tinfo); LexiconWriter *const lex_writer = ivars->lex_writer; SkipStepper *const skip_stepper = ivars->skip_stepper; SkipStepperIVARS *const skip_stepper_ivars = SkipStepper_IVARS(skip_stepper); int32_t last_skip_doc = 0; int64_t last_skip_filepos = 0; const int32_t skip_interval = Arch_Skip_Interval(Schema_Get_Architecture(ivars->schema)); // Prime heldover variables. RawPosting *posting = (RawPosting*)CERTIFY(PostPool_Fetch(self), RAWPOSTING); RawPostingIVARS *post_ivars = RawPost_IVARS(posting); CharBuf *last_term_text = CB_new_from_trusted_utf8(post_ivars->blob, post_ivars->content_len); const char *last_text_buf = CB_Get_Ptr8(last_term_text); uint32_t last_text_size = CB_Get_Size(last_term_text); SkipStepper_Set_ID_And_Filepos(skip_stepper, 0, 0); // Initialize sentinel to be used on the last iter, using an empty string // in order to make LexiconWriter Do The Right Thing. size_t sentinel_size = Class_Get_Obj_Alloc_Size(RAWPOSTING) + 20; // blob length + cushion char empty_string[] = ""; RawPosting *sentinel = RawPost_new(alloca(sentinel_size), 0, 1, empty_string, 0); while (1) { bool same_text_as_last = true; if (posting == NULL) { // On the last iter, use an empty string to make LexiconWriter // DTRT. posting = sentinel; post_ivars = RawPost_IVARS(posting); same_text_as_last = false; } else { // Compare once. if (post_ivars->content_len != last_text_size || memcmp(&post_ivars->blob, last_text_buf, last_text_size) != 0 ) { same_text_as_last = false; } } // If the term text changes, process the last term. if (!same_text_as_last) { // Hand off to LexiconWriter. LexWriter_Add_Term(lex_writer, (Obj*)last_term_text, tinfo); // Start each term afresh. TInfo_Reset(tinfo); PostWriter_Start_Term(post_writer, tinfo); // Init skip data in preparation for the next term. skip_stepper_ivars->doc_id = 0; skip_stepper_ivars->filepos = tinfo_ivars->post_filepos; last_skip_doc = 0; last_skip_filepos = tinfo_ivars->post_filepos; // Remember the term_text so we can write string diffs. CB_Mimic_Utf8(last_term_text, post_ivars->blob, post_ivars->content_len); last_text_buf = CB_Get_Ptr8(last_term_text); last_text_size = CB_Get_Size(last_term_text); } // Bail on last iter before writing invalid posting data. if (posting == sentinel) { break; } // Write posting data. PostWriter_Write_Posting(post_writer, posting); // Doc freq lags by one iter. tinfo_ivars->doc_freq++; // Write skip data. if (skip_stream != NULL && same_text_as_last && tinfo_ivars->doc_freq % skip_interval == 0 && tinfo_ivars->doc_freq != 0 ) { // If first skip group, save skip stream pos for term info. if (tinfo_ivars->doc_freq == skip_interval) { tinfo_ivars->skip_filepos = OutStream_Tell(skip_stream); } // Write deltas. last_skip_doc = skip_stepper_ivars->doc_id; last_skip_filepos = skip_stepper_ivars->filepos; skip_stepper_ivars->doc_id = post_ivars->doc_id; PostWriter_Update_Skip_Info(post_writer, skip_tinfo); skip_stepper_ivars->filepos = skip_tinfo_ivars->post_filepos; SkipStepper_Write_Record(skip_stepper, skip_stream, last_skip_doc, last_skip_filepos); } // Retrieve the next posting from the sort pool. // DECREF(posting); // No!! DON'T destroy!!! posting = (RawPosting*)PostPool_Fetch(self); post_ivars = RawPost_IVARS(posting); } // Clean up. DECREF(last_term_text); DECREF(skip_tinfo); DECREF(tinfo); }