void MatchPostWriter_Start_Term_IMP(MatchPostingWriter *self, TermInfo *tinfo) { MatchPostingWriterIVARS *const ivars = MatchPostWriter_IVARS(self); TermInfoIVARS *const tinfo_ivars = TInfo_IVARS(tinfo); ivars->last_doc_id = 0; tinfo_ivars->post_filepos = OutStream_Tell(ivars->outstream); }
void MatchTInfoStepper_Write_Delta_IMP(MatchTermInfoStepper *self, OutStream *outstream, Obj *value) { MatchTermInfoStepperIVARS *const ivars = MatchTInfoStepper_IVARS(self); TermInfo *tinfo = (TermInfo*)CERTIFY(value, TERMINFO); TermInfo *last_tinfo = (TermInfo*)ivars->value; int32_t doc_freq = TInfo_Get_Doc_Freq(tinfo); int64_t post_delta = TInfo_IVARS(tinfo)->post_filepos - TInfo_IVARS(last_tinfo)->post_filepos; // Write doc_freq. OutStream_Write_C32(outstream, doc_freq); // Write postings file pointer delta. OutStream_Write_C64(outstream, post_delta); // Write skip file pointer (maybe). if (doc_freq >= ivars->skip_interval) { OutStream_Write_C64(outstream, TInfo_IVARS(tinfo)->skip_filepos); } TInfo_Mimic((TermInfo*)ivars->value, (Obj*)tinfo); }
void MatchTInfoStepper_Read_Delta_IMP(MatchTermInfoStepper *self, InStream *instream) { MatchTermInfoStepperIVARS *const ivars = MatchTInfoStepper_IVARS(self); TermInfoIVARS *const tinfo_ivars = TInfo_IVARS((TermInfo*)ivars->value); // Read doc freq. tinfo_ivars->doc_freq = InStream_Read_C32(instream); // Adjust postings file pointer. tinfo_ivars->post_filepos += InStream_Read_C64(instream); // Maybe read skip pointer. if (tinfo_ivars->doc_freq >= ivars->skip_interval) { tinfo_ivars->skip_filepos = InStream_Read_C64(instream); } else { tinfo_ivars->skip_filepos = 0; } }
void MatchTInfoStepper_Write_Key_Frame_IMP(MatchTermInfoStepper *self, OutStream *outstream, Obj *value) { MatchTermInfoStepperIVARS *const ivars = MatchTInfoStepper_IVARS(self); TermInfo *tinfo = (TermInfo*)CERTIFY(value, TERMINFO); int32_t doc_freq = TInfo_Get_Doc_Freq(tinfo); TermInfoIVARS *const tinfo_ivars = TInfo_IVARS((TermInfo*)value); // Write doc_freq. OutStream_Write_C32(outstream, doc_freq); // Write postings file pointer. OutStream_Write_C64(outstream, tinfo_ivars->post_filepos); // Write skip file pointer (maybe). if (doc_freq >= ivars->skip_interval) { OutStream_Write_C64(outstream, tinfo_ivars->skip_filepos); } TInfo_Mimic((TermInfo*)ivars->value, (Obj*)tinfo); }
void MatchPostWriter_Update_Skip_Info_IMP(MatchPostingWriter *self, TermInfo *tinfo) { MatchPostingWriterIVARS *const ivars = MatchPostWriter_IVARS(self); TermInfoIVARS *const tinfo_ivars = TInfo_IVARS(tinfo); tinfo_ivars->post_filepos = OutStream_Tell(ivars->outstream); }
static void S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer, OutStream *skip_stream) { PostingPoolIVARS *const ivars = PostPool_IVARS(self); TermInfo *const tinfo = TInfo_new(0); TermInfo *const skip_tinfo = TInfo_new(0); TermInfoIVARS *const tinfo_ivars = TInfo_IVARS(tinfo); TermInfoIVARS *const skip_tinfo_ivars = TInfo_IVARS(skip_tinfo); LexiconWriter *const lex_writer = ivars->lex_writer; SkipStepper *const skip_stepper = ivars->skip_stepper; SkipStepperIVARS *const skip_stepper_ivars = SkipStepper_IVARS(skip_stepper); int32_t last_skip_doc = 0; int64_t last_skip_filepos = 0; const int32_t skip_interval = Arch_Skip_Interval(Schema_Get_Architecture(ivars->schema)); // Prime heldover variables. RawPosting *posting = (RawPosting*)CERTIFY(PostPool_Fetch(self), RAWPOSTING); RawPostingIVARS *post_ivars = RawPost_IVARS(posting); CharBuf *last_term_text = CB_new_from_trusted_utf8(post_ivars->blob, post_ivars->content_len); const char *last_text_buf = CB_Get_Ptr8(last_term_text); uint32_t last_text_size = CB_Get_Size(last_term_text); SkipStepper_Set_ID_And_Filepos(skip_stepper, 0, 0); // Initialize sentinel to be used on the last iter, using an empty string // in order to make LexiconWriter Do The Right Thing. size_t sentinel_size = Class_Get_Obj_Alloc_Size(RAWPOSTING) + 20; // blob length + cushion char empty_string[] = ""; RawPosting *sentinel = RawPost_new(alloca(sentinel_size), 0, 1, empty_string, 0); while (1) { bool same_text_as_last = true; if (posting == NULL) { // On the last iter, use an empty string to make LexiconWriter // DTRT. posting = sentinel; post_ivars = RawPost_IVARS(posting); same_text_as_last = false; } else { // Compare once. if (post_ivars->content_len != last_text_size || memcmp(&post_ivars->blob, last_text_buf, last_text_size) != 0 ) { same_text_as_last = false; } } // If the term text changes, process the last term. if (!same_text_as_last) { // Hand off to LexiconWriter. LexWriter_Add_Term(lex_writer, (Obj*)last_term_text, tinfo); // Start each term afresh. TInfo_Reset(tinfo); PostWriter_Start_Term(post_writer, tinfo); // Init skip data in preparation for the next term. skip_stepper_ivars->doc_id = 0; skip_stepper_ivars->filepos = tinfo_ivars->post_filepos; last_skip_doc = 0; last_skip_filepos = tinfo_ivars->post_filepos; // Remember the term_text so we can write string diffs. CB_Mimic_Utf8(last_term_text, post_ivars->blob, post_ivars->content_len); last_text_buf = CB_Get_Ptr8(last_term_text); last_text_size = CB_Get_Size(last_term_text); } // Bail on last iter before writing invalid posting data. if (posting == sentinel) { break; } // Write posting data. PostWriter_Write_Posting(post_writer, posting); // Doc freq lags by one iter. tinfo_ivars->doc_freq++; // Write skip data. if (skip_stream != NULL && same_text_as_last && tinfo_ivars->doc_freq % skip_interval == 0 && tinfo_ivars->doc_freq != 0 ) { // If first skip group, save skip stream pos for term info. if (tinfo_ivars->doc_freq == skip_interval) { tinfo_ivars->skip_filepos = OutStream_Tell(skip_stream); } // Write deltas. last_skip_doc = skip_stepper_ivars->doc_id; last_skip_filepos = skip_stepper_ivars->filepos; skip_stepper_ivars->doc_id = post_ivars->doc_id; PostWriter_Update_Skip_Info(post_writer, skip_tinfo); skip_stepper_ivars->filepos = skip_tinfo_ivars->post_filepos; SkipStepper_Write_Record(skip_stepper, skip_stream, last_skip_doc, last_skip_filepos); } // Retrieve the next posting from the sort pool. // DECREF(posting); // No!! DON'T destroy!!! posting = (RawPosting*)PostPool_Fetch(self); post_ivars = RawPost_IVARS(posting); } // Clean up. DECREF(last_term_text); DECREF(skip_tinfo); DECREF(tinfo); }