void LexWriter_start_field(LexiconWriter *self, i32_t field_num) { CharBuf *const seg_name = Seg_Get_Name(self->segment); Folder *const folder = self->folder; Snapshot *const snapshot = LexWriter_Get_Snapshot(self); /* Open outstreams. */ CB_setf(self->dat_file, "%o/lexicon-%i32.dat", seg_name, field_num); CB_setf(self->ix_file, "%o/lexicon-%i32.ix", seg_name, field_num); CB_setf(self->ixix_file, "%o/lexicon-%i32.ixix", seg_name, field_num); Snapshot_Add_Entry(snapshot, self->dat_file); Snapshot_Add_Entry(snapshot, self->ix_file); Snapshot_Add_Entry(snapshot, self->ixix_file); self->dat_out = Folder_Open_Out(folder, self->dat_file); self->ix_out = Folder_Open_Out(folder, self->ix_file); self->ixix_out = Folder_Open_Out(folder, self->ixix_file); if (!self->dat_out) { THROW("Can't open %o", self->dat_file); } if (!self->ix_out) { THROW("Can't open %o", self->ix_file); } if (!self->ixix_out) { THROW("Can't open %o", self->ixix_file); } /* Initialize count and ix_count, TermInfo and last term text. */ self->count = 0; self->ix_count = 0; TInfo_Reset(self->last_tinfo); CB_Set_Size(self->last_text, 0); }
void LexWriter_enter_temp_mode(LexiconWriter *self, OutStream *temp_outstream) { /* Assign outstream. */ if (self->dat_out != NULL) THROW("Can't enter temp mode (filename: %o) ", self->dat_file); self->dat_out = (OutStream*)INCREF(temp_outstream); /* Initialize count and ix_count, TermInfo and last term text. */ self->count = 0; self->ix_count = 0; TInfo_Reset(self->last_tinfo); CB_Set_Size(self->last_text, 0); /* Remember that we're in temp mode. */ self->temp_mode = true; }
void MatchTInfoStepper_Reset_IMP(MatchTermInfoStepper *self) { MatchTermInfoStepperIVARS *const ivars = MatchTInfoStepper_IVARS(self); TInfo_Reset((TermInfo*)ivars->value); }
static void S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer, OutStream *skip_stream) { PostingPoolIVARS *const ivars = PostPool_IVARS(self); TermInfo *const tinfo = TInfo_new(0); TermInfo *const skip_tinfo = TInfo_new(0); TermInfoIVARS *const tinfo_ivars = TInfo_IVARS(tinfo); TermInfoIVARS *const skip_tinfo_ivars = TInfo_IVARS(skip_tinfo); LexiconWriter *const lex_writer = ivars->lex_writer; SkipStepper *const skip_stepper = ivars->skip_stepper; SkipStepperIVARS *const skip_stepper_ivars = SkipStepper_IVARS(skip_stepper); int32_t last_skip_doc = 0; int64_t last_skip_filepos = 0; const int32_t skip_interval = Arch_Skip_Interval(Schema_Get_Architecture(ivars->schema)); // Prime heldover variables. RawPosting *posting = (RawPosting*)CERTIFY(PostPool_Fetch(self), RAWPOSTING); RawPostingIVARS *post_ivars = RawPost_IVARS(posting); CharBuf *last_term_text = CB_new_from_trusted_utf8(post_ivars->blob, post_ivars->content_len); const char *last_text_buf = CB_Get_Ptr8(last_term_text); uint32_t last_text_size = CB_Get_Size(last_term_text); SkipStepper_Set_ID_And_Filepos(skip_stepper, 0, 0); // Initialize sentinel to be used on the last iter, using an empty string // in order to make LexiconWriter Do The Right Thing. size_t sentinel_size = Class_Get_Obj_Alloc_Size(RAWPOSTING) + 20; // blob length + cushion char empty_string[] = ""; RawPosting *sentinel = RawPost_new(alloca(sentinel_size), 0, 1, empty_string, 0); while (1) { bool same_text_as_last = true; if (posting == NULL) { // On the last iter, use an empty string to make LexiconWriter // DTRT. posting = sentinel; post_ivars = RawPost_IVARS(posting); same_text_as_last = false; } else { // Compare once. if (post_ivars->content_len != last_text_size || memcmp(&post_ivars->blob, last_text_buf, last_text_size) != 0 ) { same_text_as_last = false; } } // If the term text changes, process the last term. if (!same_text_as_last) { // Hand off to LexiconWriter. LexWriter_Add_Term(lex_writer, (Obj*)last_term_text, tinfo); // Start each term afresh. TInfo_Reset(tinfo); PostWriter_Start_Term(post_writer, tinfo); // Init skip data in preparation for the next term. skip_stepper_ivars->doc_id = 0; skip_stepper_ivars->filepos = tinfo_ivars->post_filepos; last_skip_doc = 0; last_skip_filepos = tinfo_ivars->post_filepos; // Remember the term_text so we can write string diffs. CB_Mimic_Utf8(last_term_text, post_ivars->blob, post_ivars->content_len); last_text_buf = CB_Get_Ptr8(last_term_text); last_text_size = CB_Get_Size(last_term_text); } // Bail on last iter before writing invalid posting data. if (posting == sentinel) { break; } // Write posting data. PostWriter_Write_Posting(post_writer, posting); // Doc freq lags by one iter. tinfo_ivars->doc_freq++; // Write skip data. if (skip_stream != NULL && same_text_as_last && tinfo_ivars->doc_freq % skip_interval == 0 && tinfo_ivars->doc_freq != 0 ) { // If first skip group, save skip stream pos for term info. if (tinfo_ivars->doc_freq == skip_interval) { tinfo_ivars->skip_filepos = OutStream_Tell(skip_stream); } // Write deltas. last_skip_doc = skip_stepper_ivars->doc_id; last_skip_filepos = skip_stepper_ivars->filepos; skip_stepper_ivars->doc_id = post_ivars->doc_id; PostWriter_Update_Skip_Info(post_writer, skip_tinfo); skip_stepper_ivars->filepos = skip_tinfo_ivars->post_filepos; SkipStepper_Write_Record(skip_stepper, skip_stream, last_skip_doc, last_skip_filepos); } // Retrieve the next posting from the sort pool. // DECREF(posting); // No!! DON'T destroy!!! posting = (RawPosting*)PostPool_Fetch(self); post_ivars = RawPost_IVARS(posting); } // Clean up. DECREF(last_term_text); DECREF(skip_tinfo); DECREF(tinfo); }
void MatchTInfoStepper_reset(MatchTermInfoStepper *self) { TInfo_Reset((TermInfo*)self->value); }