Exemplo n.º 1
0
void
LexWriter_start_field(LexiconWriter *self, i32_t field_num)
{
    CharBuf  *const seg_name = Seg_Get_Name(self->segment);
    Folder   *const folder   = self->folder;
    Snapshot *const snapshot = LexWriter_Get_Snapshot(self);

    /* Open outstreams. */
    CB_setf(self->dat_file,  "%o/lexicon-%i32.dat",  seg_name, field_num);
    CB_setf(self->ix_file,   "%o/lexicon-%i32.ix",   seg_name, field_num);
    CB_setf(self->ixix_file, "%o/lexicon-%i32.ixix", seg_name, field_num);
    Snapshot_Add_Entry(snapshot, self->dat_file);
    Snapshot_Add_Entry(snapshot, self->ix_file);
    Snapshot_Add_Entry(snapshot, self->ixix_file);
    self->dat_out  = Folder_Open_Out(folder, self->dat_file);
    self->ix_out   = Folder_Open_Out(folder, self->ix_file);
    self->ixix_out = Folder_Open_Out(folder, self->ixix_file);
    if (!self->dat_out)  { THROW("Can't open %o", self->dat_file); }
    if (!self->ix_out)   { THROW("Can't open %o", self->ix_file); }
    if (!self->ixix_out) { THROW("Can't open %o", self->ixix_file); }

    /* Initialize count and ix_count, TermInfo and last term text. */
    self->count    = 0;
    self->ix_count = 0;
    TInfo_Reset(self->last_tinfo);
    CB_Set_Size(self->last_text, 0);
}
Exemplo n.º 2
0
void
LexWriter_enter_temp_mode(LexiconWriter *self, OutStream *temp_outstream)
{
    /* Assign outstream. */
    if (self->dat_out != NULL)
        THROW("Can't enter temp mode (filename: %o) ", self->dat_file);
    self->dat_out = (OutStream*)INCREF(temp_outstream);

    /* Initialize count and ix_count, TermInfo and last term text. */
    self->count    = 0;
    self->ix_count = 0;
    TInfo_Reset(self->last_tinfo);
    CB_Set_Size(self->last_text, 0);

    /* Remember that we're in temp mode. */
    self->temp_mode = true;
}
Exemplo n.º 3
0
void
MatchTInfoStepper_Reset_IMP(MatchTermInfoStepper *self) {
    MatchTermInfoStepperIVARS *const ivars = MatchTInfoStepper_IVARS(self);
    TInfo_Reset((TermInfo*)ivars->value);
}
Exemplo n.º 4
0
static void
S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer,
                           OutStream *skip_stream) {
    PostingPoolIVARS *const ivars = PostPool_IVARS(self);
    TermInfo      *const tinfo            = TInfo_new(0);
    TermInfo      *const skip_tinfo       = TInfo_new(0);
    TermInfoIVARS *const tinfo_ivars      = TInfo_IVARS(tinfo);
    TermInfoIVARS *const skip_tinfo_ivars = TInfo_IVARS(skip_tinfo);
    LexiconWriter *const lex_writer       = ivars->lex_writer;
    SkipStepper   *const skip_stepper     = ivars->skip_stepper;
    SkipStepperIVARS *const skip_stepper_ivars
        = SkipStepper_IVARS(skip_stepper);
    int32_t        last_skip_doc          = 0;
    int64_t        last_skip_filepos      = 0;
    const int32_t  skip_interval
        = Arch_Skip_Interval(Schema_Get_Architecture(ivars->schema));

    // Prime heldover variables.
    RawPosting *posting
        = (RawPosting*)CERTIFY(PostPool_Fetch(self), RAWPOSTING);
    RawPostingIVARS *post_ivars = RawPost_IVARS(posting);
    CharBuf *last_term_text
        = CB_new_from_trusted_utf8(post_ivars->blob, post_ivars->content_len);
    const char *last_text_buf  = CB_Get_Ptr8(last_term_text);
    uint32_t    last_text_size = CB_Get_Size(last_term_text);
    SkipStepper_Set_ID_And_Filepos(skip_stepper, 0, 0);

    // Initialize sentinel to be used on the last iter, using an empty string
    // in order to make LexiconWriter Do The Right Thing.
    size_t sentinel_size = Class_Get_Obj_Alloc_Size(RAWPOSTING)
                           + 20;  // blob length + cushion
    char empty_string[] = "";
    RawPosting *sentinel = RawPost_new(alloca(sentinel_size), 0, 1,
                                       empty_string, 0);

    while (1) {
        bool same_text_as_last = true;

        if (posting == NULL) {
            // On the last iter, use an empty string to make LexiconWriter
            // DTRT.
            posting = sentinel;
            post_ivars = RawPost_IVARS(posting);
            same_text_as_last = false;
        }
        else {
            // Compare once.
            if (post_ivars->content_len != last_text_size
                || memcmp(&post_ivars->blob, last_text_buf, last_text_size) != 0
               ) {
                same_text_as_last = false;
            }
        }

        // If the term text changes, process the last term.
        if (!same_text_as_last) {
            // Hand off to LexiconWriter.
            LexWriter_Add_Term(lex_writer, (Obj*)last_term_text, tinfo);

            // Start each term afresh.
            TInfo_Reset(tinfo);
            PostWriter_Start_Term(post_writer, tinfo);

            // Init skip data in preparation for the next term.
            skip_stepper_ivars->doc_id  = 0;
            skip_stepper_ivars->filepos = tinfo_ivars->post_filepos;
            last_skip_doc         = 0;
            last_skip_filepos     = tinfo_ivars->post_filepos;

            // Remember the term_text so we can write string diffs.
            CB_Mimic_Utf8(last_term_text, post_ivars->blob,
                          post_ivars->content_len);
            last_text_buf  = CB_Get_Ptr8(last_term_text);
            last_text_size = CB_Get_Size(last_term_text);
        }

        // Bail on last iter before writing invalid posting data.
        if (posting == sentinel) { break; }

        // Write posting data.
        PostWriter_Write_Posting(post_writer, posting);

        // Doc freq lags by one iter.
        tinfo_ivars->doc_freq++;

        //  Write skip data.
        if (skip_stream != NULL
            && same_text_as_last
            && tinfo_ivars->doc_freq % skip_interval == 0
            && tinfo_ivars->doc_freq != 0
           ) {
            // If first skip group, save skip stream pos for term info.
            if (tinfo_ivars->doc_freq == skip_interval) {
                tinfo_ivars->skip_filepos = OutStream_Tell(skip_stream);
            }
            // Write deltas.
            last_skip_doc               = skip_stepper_ivars->doc_id;
            last_skip_filepos           = skip_stepper_ivars->filepos;
            skip_stepper_ivars->doc_id  = post_ivars->doc_id;
            PostWriter_Update_Skip_Info(post_writer, skip_tinfo);
            skip_stepper_ivars->filepos = skip_tinfo_ivars->post_filepos;
            SkipStepper_Write_Record(skip_stepper, skip_stream,
                                     last_skip_doc, last_skip_filepos);
        }

        // Retrieve the next posting from the sort pool.
        // DECREF(posting);  // No!!  DON'T destroy!!!

        posting = (RawPosting*)PostPool_Fetch(self);
        post_ivars = RawPost_IVARS(posting);
    }

    // Clean up.
    DECREF(last_term_text);
    DECREF(skip_tinfo);
    DECREF(tinfo);
}
Exemplo n.º 5
0
void
MatchTInfoStepper_reset(MatchTermInfoStepper *self) {
    TInfo_Reset((TermInfo*)self->value);
}