Esempio n. 1
0
LexiconWriter*
LexWriter_init(LexiconWriter *self, Snapshot *snapshot, Segment *segment, 
               PolyReader *polyreader)
{
    Schema       *schema = PolyReader_Get_Schema(polyreader);
    Architecture *arch   = Schema_Get_Architecture(schema);

    DataWriter_init((DataWriter*)self, snapshot, segment, polyreader);

    /* Assign. */
    self->index_interval = Arch_Index_Interval(arch);
    self->skip_interval  = Arch_Skip_Interval(arch);

    /* Init. */
    self->ix_out             = NULL;
    self->ixix_out           = NULL;
    self->dat_out            = NULL;
    self->count              = 0;
    self->ix_count           = 0;
    self->last_tinfo         = TInfo_new(0,0,0,0);
    self->last_text          = CB_new(40);
    self->dat_file           = CB_new(30);
    self->ix_file            = CB_new(30);
    self->ixix_file          = CB_new(30);
    self->counts             = Hash_new(0);
    self->ix_counts          = Hash_new(0);
    self->stepper            = NULL;
    self->temp_mode          = false;

    /* Derive. */
    self->stepper = LexStepper_new((CharBuf*)&EMPTY, self->skip_interval);

    return self;
}
Esempio n. 2
0
static void
S_try_init_components(void *context) {
    SegReader *self = (SegReader*)context;
    Schema *schema = SegReader_Get_Schema(self);
    Architecture *arch = Schema_Get_Architecture(schema);
    Arch_Init_Seg_Reader(arch, self);
}
Esempio n. 3
0
LexiconWriter*
LexWriter_init(LexiconWriter *self, Schema *schema, Snapshot *snapshot,
               Segment *segment, PolyReader *polyreader)
{
    Architecture *arch   = Schema_Get_Architecture(schema);

    DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader);

    // Assign. 
    self->index_interval = Arch_Index_Interval(arch);
    self->skip_interval  = Arch_Skip_Interval(arch);

    // Init. 
    self->ix_out             = NULL;
    self->ixix_out           = NULL;
    self->dat_out            = NULL;
    self->count              = 0;
    self->ix_count           = 0;
    self->dat_file           = CB_new(30);
    self->ix_file            = CB_new(30);
    self->ixix_file          = CB_new(30);
    self->counts             = Hash_new(0);
    self->ix_counts          = Hash_new(0);
    self->temp_mode          = false;
    self->term_stepper       = NULL;
    self->tinfo_stepper      = (TermStepper*)MatchTInfoStepper_new(schema);

    return self;
}
Esempio n. 4
0
MatchTermInfoStepper*
MatchTInfoStepper_init(MatchTermInfoStepper *self, Schema *schema) {
    Architecture *arch = Schema_Get_Architecture(schema);
    TermStepper_init((TermStepper*)self);
    self->skip_interval = Arch_Skip_Interval(arch);
    self->value = (Obj*)TInfo_new(0);
    return self;
}
Esempio n. 5
0
SegWriter*
SegWriter_init(SegWriter *self, Schema *schema, Snapshot *snapshot,
               Segment *segment, PolyReader *polyreader) {
    Architecture *arch   = Schema_Get_Architecture(schema);
    DataWriter_init((DataWriter*)self, schema, snapshot, segment, polyreader);
    SegWriterIVARS *const ivars = SegWriter_IVARS(self);
    ivars->by_api   = Hash_new(0);
    ivars->inverter = Inverter_new(schema, segment);
    ivars->writers  = Vec_new(16);
    Arch_Init_Seg_Writer(arch, self);
    return self;
}
Esempio n. 6
0
LexIndex*
LexIndex_init(LexIndex *self, Schema *schema, Folder *folder,
              Segment *segment, String *field) {
    int32_t  field_num = Seg_Field_Num(segment, field);
    String  *seg_name  = Seg_Get_Name(segment);
    String  *ixix_file = Str_newf("%o/lexicon-%i32.ixix", seg_name, field_num);
    String  *ix_file   = Str_newf("%o/lexicon-%i32.ix", seg_name, field_num);
    Architecture *arch = Schema_Get_Architecture(schema);

    // Init.
    Lex_init((Lexicon*)self, field);
    LexIndexIVARS *const ivars = LexIndex_IVARS(self);
    ivars->tinfo        = TInfo_new(0);
    ivars->tick         = 0;

    // Derive
    ivars->field_type = Schema_Fetch_Type(schema, field);
    if (!ivars->field_type) {
        String *mess = MAKE_MESS("Unknown field: '%o'", field);
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        Err_throw_mess(ERR, mess);
    }
    ivars->field_type = (FieldType*)INCREF(ivars->field_type);
    ivars->term_stepper = FType_Make_Term_Stepper(ivars->field_type);
    ivars->ixix_in = Folder_Open_In(folder, ixix_file);
    if (!ivars->ixix_in) {
        Err *error = (Err*)INCREF(Err_get_error());
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        RETHROW(error);
    }
    ivars->ix_in = Folder_Open_In(folder, ix_file);
    if (!ivars->ix_in) {
        Err *error = (Err*)INCREF(Err_get_error());
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        RETHROW(error);
    }
    ivars->index_interval = Arch_Index_Interval(arch);
    ivars->skip_interval  = Arch_Skip_Interval(arch);
    ivars->size    = (int32_t)(InStream_Length(ivars->ixix_in) / sizeof(int64_t));
    ivars->offsets = (const int64_t*)InStream_Buf(ivars->ixix_in,
            (size_t)InStream_Length(ivars->ixix_in));

    DECREF(ixix_file);
    DECREF(ix_file);

    return self;
}
Esempio n. 7
0
LexIndex*
LexIndex_init(LexIndex *self, Schema *schema, Folder *folder, 
              Segment *segment, const CharBuf *field)
{
    i32_t    field_num = Seg_Field_Num(segment, field);
    CharBuf *seg_name  = Seg_Get_Name(segment);
    CharBuf *ixix_file = CB_newf("%o/lexicon-%i32.ixix", seg_name, field_num);
    CharBuf *ix_file   = CB_newf("%o/lexicon-%i32.ix", seg_name, field_num);
    Architecture *arch = Schema_Get_Architecture(schema);

    /* Init. */
    self->term  = ViewCB_new_from_trusted_utf8(NULL, 0);
    self->tinfo = TInfo_new(0,0,0,0);
    self->tick  = 0;

    /* Derive */
    self->field_type = Schema_Fetch_Type(schema, field);
    if (!self->field_type) {
        CharBuf *mess = MAKE_MESS("Unknown field: '%o'", field);
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        Err_throw_mess(mess);
    }
    INCREF(self->field_type);
    self->ixix_in = Folder_Open_In(folder, ixix_file);
    self->ix_in   = Folder_Open_In(folder, ix_file);
    if (!self->ixix_in || !self->ix_in) {
        CharBuf *mess =
             MAKE_MESS("Can't open either %o or %o", ix_file, ixix_file);
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        Err_throw_mess(mess);
    }
    self->index_interval = Arch_Index_Interval(arch);
    self->skip_interval  = Arch_Skip_Interval(arch);
    self->size    = (i32_t)(InStream_Length(self->ixix_in) / sizeof(i64_t));
    self->offsets = (i64_t*)InStream_Buf(self->ixix_in,
        (size_t)InStream_Length(self->ixix_in));
    self->data = InStream_Buf(self->ix_in, InStream_Length(self->ix_in));
    self->limit = self->data + InStream_Length(self->ix_in);

    DECREF(ixix_file);
    DECREF(ix_file);

    return self;
}
Esempio n. 8
0
PostingPool*
PostPool_init(PostingPool *self, Schema *schema, 
              const CharBuf *field, MemoryPool *mem_pool)
{
    Architecture *arch = Schema_Get_Architecture(schema);

    /* Init. */
    SortExRun_init((SortExRun*)self);
    self->lex_instream     = NULL;
    self->post_instream    = NULL;
    self->lex_start        = I64_MAX;
    self->post_start       = I64_MAX;
    self->lex_end          = 0;
    self->post_end         = 0;
    self->flipped          = false;
    self->from_seg         = false;
    self->mem_thresh       = 0;
    self->doc_base         = 0;
    self->last_doc_id      = 0;
    self->doc_map          = NULL;
    self->post_count       = 0;
    self->scratch          = NULL;
    self->scratch_cap      = 0;
    self->lex_stepper = LexStepper_new(field, Arch_Skip_Interval(arch));

    /* Assign. */
    self->schema         = (Schema*)INCREF(schema);
    self->mem_pool       = (MemoryPool*)INCREF(mem_pool);
    self->field          = CB_Clone(field);

    /* Derive. */
    self->posting = Schema_Fetch_Posting(schema, field);
    self->posting = (Posting*)Post_Clone(self->posting);
    self->type    = (FieldType*)INCREF(Schema_Fetch_Type(schema, field));
    self->compare = PostPoolQ_compare_rawp;

    return self;
}
Esempio n. 9
0
static void
S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer,
                           OutStream *skip_stream) {
    PostingPoolIVARS *const ivars = PostPool_IVARS(self);
    TermInfo      *const tinfo            = TInfo_new(0);
    TermInfo      *const skip_tinfo       = TInfo_new(0);
    TermInfoIVARS *const tinfo_ivars      = TInfo_IVARS(tinfo);
    TermInfoIVARS *const skip_tinfo_ivars = TInfo_IVARS(skip_tinfo);
    LexiconWriter *const lex_writer       = ivars->lex_writer;
    SkipStepper   *const skip_stepper     = ivars->skip_stepper;
    SkipStepperIVARS *const skip_stepper_ivars
        = SkipStepper_IVARS(skip_stepper);
    int32_t        last_skip_doc          = 0;
    int64_t        last_skip_filepos      = 0;
    const int32_t  skip_interval
        = Arch_Skip_Interval(Schema_Get_Architecture(ivars->schema));

    // Prime heldover variables.
    RawPosting *posting
        = (RawPosting*)CERTIFY(PostPool_Fetch(self), RAWPOSTING);
    RawPostingIVARS *post_ivars = RawPost_IVARS(posting);
    CharBuf *last_term_text
        = CB_new_from_trusted_utf8(post_ivars->blob, post_ivars->content_len);
    const char *last_text_buf  = CB_Get_Ptr8(last_term_text);
    uint32_t    last_text_size = CB_Get_Size(last_term_text);
    SkipStepper_Set_ID_And_Filepos(skip_stepper, 0, 0);

    // Initialize sentinel to be used on the last iter, using an empty string
    // in order to make LexiconWriter Do The Right Thing.
    size_t sentinel_size = Class_Get_Obj_Alloc_Size(RAWPOSTING)
                           + 20;  // blob length + cushion
    char empty_string[] = "";
    RawPosting *sentinel = RawPost_new(alloca(sentinel_size), 0, 1,
                                       empty_string, 0);

    while (1) {
        bool same_text_as_last = true;

        if (posting == NULL) {
            // On the last iter, use an empty string to make LexiconWriter
            // DTRT.
            posting = sentinel;
            post_ivars = RawPost_IVARS(posting);
            same_text_as_last = false;
        }
        else {
            // Compare once.
            if (post_ivars->content_len != last_text_size
                || memcmp(&post_ivars->blob, last_text_buf, last_text_size) != 0
               ) {
                same_text_as_last = false;
            }
        }

        // If the term text changes, process the last term.
        if (!same_text_as_last) {
            // Hand off to LexiconWriter.
            LexWriter_Add_Term(lex_writer, (Obj*)last_term_text, tinfo);

            // Start each term afresh.
            TInfo_Reset(tinfo);
            PostWriter_Start_Term(post_writer, tinfo);

            // Init skip data in preparation for the next term.
            skip_stepper_ivars->doc_id  = 0;
            skip_stepper_ivars->filepos = tinfo_ivars->post_filepos;
            last_skip_doc         = 0;
            last_skip_filepos     = tinfo_ivars->post_filepos;

            // Remember the term_text so we can write string diffs.
            CB_Mimic_Utf8(last_term_text, post_ivars->blob,
                          post_ivars->content_len);
            last_text_buf  = CB_Get_Ptr8(last_term_text);
            last_text_size = CB_Get_Size(last_term_text);
        }

        // Bail on last iter before writing invalid posting data.
        if (posting == sentinel) { break; }

        // Write posting data.
        PostWriter_Write_Posting(post_writer, posting);

        // Doc freq lags by one iter.
        tinfo_ivars->doc_freq++;

        //  Write skip data.
        if (skip_stream != NULL
            && same_text_as_last
            && tinfo_ivars->doc_freq % skip_interval == 0
            && tinfo_ivars->doc_freq != 0
           ) {
            // If first skip group, save skip stream pos for term info.
            if (tinfo_ivars->doc_freq == skip_interval) {
                tinfo_ivars->skip_filepos = OutStream_Tell(skip_stream);
            }
            // Write deltas.
            last_skip_doc               = skip_stepper_ivars->doc_id;
            last_skip_filepos           = skip_stepper_ivars->filepos;
            skip_stepper_ivars->doc_id  = post_ivars->doc_id;
            PostWriter_Update_Skip_Info(post_writer, skip_tinfo);
            skip_stepper_ivars->filepos = skip_tinfo_ivars->post_filepos;
            SkipStepper_Write_Record(skip_stepper, skip_stream,
                                     last_skip_doc, last_skip_filepos);
        }

        // Retrieve the next posting from the sort pool.
        // DECREF(posting);  // No!!  DON'T destroy!!!

        posting = (RawPosting*)PostPool_Fetch(self);
        post_ivars = RawPost_IVARS(posting);
    }

    // Clean up.
    DECREF(last_term_text);
    DECREF(skip_tinfo);
    DECREF(tinfo);
}