Example #1
0
void
MatchPostWriter_Start_Term_IMP(MatchPostingWriter *self, TermInfo *tinfo) {
    MatchPostingWriterIVARS *const ivars = MatchPostWriter_IVARS(self);
    TermInfoIVARS *const tinfo_ivars = TInfo_IVARS(tinfo);
    ivars->last_doc_id   = 0;
    tinfo_ivars->post_filepos = OutStream_Tell(ivars->outstream);
}
Example #2
0
void
MatchTInfoStepper_Write_Delta_IMP(MatchTermInfoStepper *self,
                                  OutStream *outstream, Obj *value) {
    MatchTermInfoStepperIVARS *const ivars = MatchTInfoStepper_IVARS(self);
    TermInfo *tinfo      = (TermInfo*)CERTIFY(value, TERMINFO);
    TermInfo *last_tinfo = (TermInfo*)ivars->value;
    int32_t   doc_freq   = TInfo_Get_Doc_Freq(tinfo);
    int64_t   post_delta = TInfo_IVARS(tinfo)->post_filepos
                           - TInfo_IVARS(last_tinfo)->post_filepos;

    // Write doc_freq.
    OutStream_Write_C32(outstream, doc_freq);

    // Write postings file pointer delta.
    OutStream_Write_C64(outstream, post_delta);

    // Write skip file pointer (maybe).
    if (doc_freq >= ivars->skip_interval) {
        OutStream_Write_C64(outstream, TInfo_IVARS(tinfo)->skip_filepos);
    }

    TInfo_Mimic((TermInfo*)ivars->value, (Obj*)tinfo);
}
Example #3
0
void
MatchTInfoStepper_Read_Delta_IMP(MatchTermInfoStepper *self, InStream *instream) {
    MatchTermInfoStepperIVARS *const ivars = MatchTInfoStepper_IVARS(self);
    TermInfoIVARS *const tinfo_ivars = TInfo_IVARS((TermInfo*)ivars->value);

    // Read doc freq.
    tinfo_ivars->doc_freq = InStream_Read_C32(instream);

    // Adjust postings file pointer.
    tinfo_ivars->post_filepos += InStream_Read_C64(instream);

    // Maybe read skip pointer.
    if (tinfo_ivars->doc_freq >= ivars->skip_interval) {
        tinfo_ivars->skip_filepos = InStream_Read_C64(instream);
    }
    else {
        tinfo_ivars->skip_filepos = 0;
    }
}
Example #4
0
void
MatchTInfoStepper_Write_Key_Frame_IMP(MatchTermInfoStepper *self,
                                      OutStream *outstream, Obj *value) {
    MatchTermInfoStepperIVARS *const ivars = MatchTInfoStepper_IVARS(self);
    TermInfo *tinfo    = (TermInfo*)CERTIFY(value, TERMINFO);
    int32_t   doc_freq = TInfo_Get_Doc_Freq(tinfo);
    TermInfoIVARS *const tinfo_ivars = TInfo_IVARS((TermInfo*)value);

    // Write doc_freq.
    OutStream_Write_C32(outstream, doc_freq);

    // Write postings file pointer.
    OutStream_Write_C64(outstream, tinfo_ivars->post_filepos);

    // Write skip file pointer (maybe).
    if (doc_freq >= ivars->skip_interval) {
        OutStream_Write_C64(outstream, tinfo_ivars->skip_filepos);
    }

    TInfo_Mimic((TermInfo*)ivars->value, (Obj*)tinfo);
}
Example #5
0
void
MatchPostWriter_Update_Skip_Info_IMP(MatchPostingWriter *self, TermInfo *tinfo) {
    MatchPostingWriterIVARS *const ivars = MatchPostWriter_IVARS(self);
    TermInfoIVARS *const tinfo_ivars = TInfo_IVARS(tinfo);
    tinfo_ivars->post_filepos = OutStream_Tell(ivars->outstream);
}
Example #6
0
static void
S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer,
                           OutStream *skip_stream) {
    PostingPoolIVARS *const ivars = PostPool_IVARS(self);
    TermInfo      *const tinfo            = TInfo_new(0);
    TermInfo      *const skip_tinfo       = TInfo_new(0);
    TermInfoIVARS *const tinfo_ivars      = TInfo_IVARS(tinfo);
    TermInfoIVARS *const skip_tinfo_ivars = TInfo_IVARS(skip_tinfo);
    LexiconWriter *const lex_writer       = ivars->lex_writer;
    SkipStepper   *const skip_stepper     = ivars->skip_stepper;
    SkipStepperIVARS *const skip_stepper_ivars
        = SkipStepper_IVARS(skip_stepper);
    int32_t        last_skip_doc          = 0;
    int64_t        last_skip_filepos      = 0;
    const int32_t  skip_interval
        = Arch_Skip_Interval(Schema_Get_Architecture(ivars->schema));

    // Prime heldover variables.
    RawPosting *posting
        = (RawPosting*)CERTIFY(PostPool_Fetch(self), RAWPOSTING);
    RawPostingIVARS *post_ivars = RawPost_IVARS(posting);
    CharBuf *last_term_text
        = CB_new_from_trusted_utf8(post_ivars->blob, post_ivars->content_len);
    const char *last_text_buf  = CB_Get_Ptr8(last_term_text);
    uint32_t    last_text_size = CB_Get_Size(last_term_text);
    SkipStepper_Set_ID_And_Filepos(skip_stepper, 0, 0);

    // Initialize sentinel to be used on the last iter, using an empty string
    // in order to make LexiconWriter Do The Right Thing.
    size_t sentinel_size = Class_Get_Obj_Alloc_Size(RAWPOSTING)
                           + 20;  // blob length + cushion
    char empty_string[] = "";
    RawPosting *sentinel = RawPost_new(alloca(sentinel_size), 0, 1,
                                       empty_string, 0);

    while (1) {
        bool same_text_as_last = true;

        if (posting == NULL) {
            // On the last iter, use an empty string to make LexiconWriter
            // DTRT.
            posting = sentinel;
            post_ivars = RawPost_IVARS(posting);
            same_text_as_last = false;
        }
        else {
            // Compare once.
            if (post_ivars->content_len != last_text_size
                || memcmp(&post_ivars->blob, last_text_buf, last_text_size) != 0
               ) {
                same_text_as_last = false;
            }
        }

        // If the term text changes, process the last term.
        if (!same_text_as_last) {
            // Hand off to LexiconWriter.
            LexWriter_Add_Term(lex_writer, (Obj*)last_term_text, tinfo);

            // Start each term afresh.
            TInfo_Reset(tinfo);
            PostWriter_Start_Term(post_writer, tinfo);

            // Init skip data in preparation for the next term.
            skip_stepper_ivars->doc_id  = 0;
            skip_stepper_ivars->filepos = tinfo_ivars->post_filepos;
            last_skip_doc         = 0;
            last_skip_filepos     = tinfo_ivars->post_filepos;

            // Remember the term_text so we can write string diffs.
            CB_Mimic_Utf8(last_term_text, post_ivars->blob,
                          post_ivars->content_len);
            last_text_buf  = CB_Get_Ptr8(last_term_text);
            last_text_size = CB_Get_Size(last_term_text);
        }

        // Bail on last iter before writing invalid posting data.
        if (posting == sentinel) { break; }

        // Write posting data.
        PostWriter_Write_Posting(post_writer, posting);

        // Doc freq lags by one iter.
        tinfo_ivars->doc_freq++;

        //  Write skip data.
        if (skip_stream != NULL
            && same_text_as_last
            && tinfo_ivars->doc_freq % skip_interval == 0
            && tinfo_ivars->doc_freq != 0
           ) {
            // If first skip group, save skip stream pos for term info.
            if (tinfo_ivars->doc_freq == skip_interval) {
                tinfo_ivars->skip_filepos = OutStream_Tell(skip_stream);
            }
            // Write deltas.
            last_skip_doc               = skip_stepper_ivars->doc_id;
            last_skip_filepos           = skip_stepper_ivars->filepos;
            skip_stepper_ivars->doc_id  = post_ivars->doc_id;
            PostWriter_Update_Skip_Info(post_writer, skip_tinfo);
            skip_stepper_ivars->filepos = skip_tinfo_ivars->post_filepos;
            SkipStepper_Write_Record(skip_stepper, skip_stream,
                                     last_skip_doc, last_skip_filepos);
        }

        // Retrieve the next posting from the sort pool.
        // DECREF(posting);  // No!!  DON'T destroy!!!

        posting = (RawPosting*)PostPool_Fetch(self);
        post_ivars = RawPost_IVARS(posting);
    }

    // Clean up.
    DECREF(last_term_text);
    DECREF(skip_tinfo);
    DECREF(tinfo);
}