static void
S_add_last_term_to_ix(LexiconWriter *self, char *last_text, size_t last_size)
{
    OutStream *const ix_out     = self->ix_out;
    OutStream *const ixix_out   = self->ixix_out;
    TermInfo  *const last_tinfo = self->last_tinfo;

    /* Write file pointer to index record. */
    OutStream_Write_U64(ixix_out, OutStream_Tell(ix_out));

    /* Write term text. */
    OutStream_Write_C32(ix_out, last_size);
    OutStream_Write_Bytes(ix_out, last_text, last_size);
    
    /* Write doc_freq. */
    OutStream_Write_C32(ix_out, last_tinfo->doc_freq);

    /* Write postings file pointer. */
    OutStream_Write_C64(ix_out, last_tinfo->post_filepos);

    /* Write skip file pointer (maybe). */
    if (last_tinfo->doc_freq >= self->skip_interval) {
        OutStream_Write_C64(ix_out, last_tinfo->skip_filepos);
    }

    /* Write file pointer to main record. */
    OutStream_Write_C64(ix_out, OutStream_Tell(self->dat_out));

    /* Keep track of how many terms have been added to lexicon.ix. */
    self->ix_count++;
}
Example #2
0
void
Freezer_serialize_string(String *string, OutStream *outstream) {
    size_t      size = Str_Get_Size(string);
    const char *buf  = Str_Get_Ptr8(string);
    OutStream_Write_C64(outstream, size);
    OutStream_Write_Bytes(outstream, buf, size);
}
Example #3
0
void
Freezer_serialize(Obj *obj, OutStream *outstream) {
    if (Obj_is_a(obj, STRING)) {
        Freezer_serialize_string((String*)obj, outstream);
    }
    else if (Obj_is_a(obj, BLOB)) {
        Freezer_serialize_blob((Blob*)obj, outstream);
    }
    else if (Obj_is_a(obj, VECTOR)) {
        Freezer_serialize_varray((Vector*)obj, outstream);
    }
    else if (Obj_is_a(obj, HASH)) {
        Freezer_serialize_hash((Hash*)obj, outstream);
    }
    else if (Obj_is_a(obj, INTEGER)) {
        int64_t val = Int_Get_Value((Integer*)obj);
        OutStream_Write_C64(outstream, (uint64_t)val);
    }
    else if (Obj_is_a(obj, FLOAT)) {
        double val = Float_Get_Value((Float*)obj);
        OutStream_Write_F64(outstream, val);
    }
    else if (Obj_is_a(obj, BOOLEAN)) {
        bool val = Bool_Get_Value((Boolean*)obj);
        OutStream_Write_U8(outstream, (uint8_t)val);
    }
    else if (Obj_is_a(obj, QUERY)) {
        Query_Serialize((Query*)obj, outstream);
    }
    else if (Obj_is_a(obj, DOC)) {
        Doc_Serialize((Doc*)obj, outstream);
    }
    else if (Obj_is_a(obj, DOCVECTOR)) {
        DocVec_Serialize((DocVector*)obj, outstream);
    }
    else if (Obj_is_a(obj, TERMVECTOR)) {
        TV_Serialize((TermVector*)obj, outstream);
    }
    else if (Obj_is_a(obj, SIMILARITY)) {
        Sim_Serialize((Similarity*)obj, outstream);
    }
    else if (Obj_is_a(obj, MATCHDOC)) {
        MatchDoc_Serialize((MatchDoc*)obj, outstream);
    }
    else if (Obj_is_a(obj, TOPDOCS)) {
        TopDocs_Serialize((TopDocs*)obj, outstream);
    }
    else if (Obj_is_a(obj, SORTSPEC)) {
        SortSpec_Serialize((SortSpec*)obj, outstream);
    }
    else if (Obj_is_a(obj, SORTRULE)) {
        SortRule_Serialize((SortRule*)obj, outstream);
    }
    else {
        THROW(ERR, "Don't know how to serialize a %o",
              Obj_get_class_name(obj));
    }
}
Example #4
0
void
MatchTInfoStepper_write_key_frame(MatchTermInfoStepper *self,
                                  OutStream *outstream, Obj *value) {
    TermInfo *tinfo    = (TermInfo*)CERTIFY(value, TERMINFO);
    int32_t   doc_freq = TInfo_Get_Doc_Freq(tinfo);

    // Write doc_freq.
    OutStream_Write_C32(outstream, doc_freq);

    // Write postings file pointer.
    OutStream_Write_C64(outstream, tinfo->post_filepos);

    // Write skip file pointer (maybe).
    if (doc_freq >= self->skip_interval) {
        OutStream_Write_C64(outstream, tinfo->skip_filepos);
    }

    TInfo_Mimic((TermInfo*)self->value, (Obj*)tinfo);
}
Example #5
0
void
MatchTInfoStepper_Write_Key_Frame_IMP(MatchTermInfoStepper *self,
                                      OutStream *outstream, Obj *value) {
    MatchTermInfoStepperIVARS *const ivars = MatchTInfoStepper_IVARS(self);
    TermInfo *tinfo    = (TermInfo*)CERTIFY(value, TERMINFO);
    int32_t   doc_freq = TInfo_Get_Doc_Freq(tinfo);
    TermInfoIVARS *const tinfo_ivars = TInfo_IVARS((TermInfo*)value);

    // Write doc_freq.
    OutStream_Write_C32(outstream, doc_freq);

    // Write postings file pointer.
    OutStream_Write_C64(outstream, tinfo_ivars->post_filepos);

    // Write skip file pointer (maybe).
    if (doc_freq >= ivars->skip_interval) {
        OutStream_Write_C64(outstream, tinfo_ivars->skip_filepos);
    }

    TInfo_Mimic((TermInfo*)ivars->value, (Obj*)tinfo);
}
Example #6
0
void
MatchTInfoStepper_Write_Delta_IMP(MatchTermInfoStepper *self,
                                  OutStream *outstream, Obj *value) {
    MatchTermInfoStepperIVARS *const ivars = MatchTInfoStepper_IVARS(self);
    TermInfo *tinfo      = (TermInfo*)CERTIFY(value, TERMINFO);
    TermInfo *last_tinfo = (TermInfo*)ivars->value;
    int32_t   doc_freq   = TInfo_Get_Doc_Freq(tinfo);
    int64_t   post_delta = TInfo_IVARS(tinfo)->post_filepos
                           - TInfo_IVARS(last_tinfo)->post_filepos;

    // Write doc_freq.
    OutStream_Write_C32(outstream, doc_freq);

    // Write postings file pointer delta.
    OutStream_Write_C64(outstream, post_delta);

    // Write skip file pointer (maybe).
    if (doc_freq >= ivars->skip_interval) {
        OutStream_Write_C64(outstream, TInfo_IVARS(tinfo)->skip_filepos);
    }

    TInfo_Mimic((TermInfo*)ivars->value, (Obj*)tinfo);
}
Example #7
0
static void
S_add_last_term_to_ix(LexiconWriter *self)
{
    // Write file pointer to index record. 
    OutStream_Write_I64(self->ixix_out, OutStream_Tell(self->ix_out));

    // Write term and file pointer to main record.  Track count of terms added
    // to ix.
    TermStepper_Write_Key_Frame(self->term_stepper,
        self->ix_out, TermStepper_Get_Value(self->term_stepper));
    TermStepper_Write_Key_Frame(self->tinfo_stepper, 
        self->ix_out, TermStepper_Get_Value(self->tinfo_stepper));
    OutStream_Write_C64(self->ix_out, OutStream_Tell(self->dat_out));
    self->ix_count++;
}
Example #8
0
void
TV_Serialize_IMP(TermVector *self, OutStream *target) {
    TermVectorIVARS *const ivars = TV_IVARS(self);
    int32_t *posits = I32Arr_IVARS(ivars->positions)->ints;
    int32_t *starts = I32Arr_IVARS(ivars->start_offsets)->ints;
    int32_t *ends   = I32Arr_IVARS(ivars->start_offsets)->ints;

    Freezer_serialize_string(ivars->field, target);
    Freezer_serialize_string(ivars->text, target);
    OutStream_Write_C64(target, ivars->num_pos);

    for (size_t i = 0; i < ivars->num_pos; i++) {
        OutStream_Write_C32(target, posits[i]);
        OutStream_Write_C32(target, starts[i]);
        OutStream_Write_C32(target, ends[i]);
    }
}
Example #9
0
void
DocWriter_Add_Inverted_Doc_IMP(DocWriter *self, Inverter *inverter,
                               int32_t doc_id) {
    DocWriterIVARS *const ivars = DocWriter_IVARS(self);
    OutStream *dat_out    = S_lazy_init(self);
    OutStream *ix_out     = ivars->ix_out;
    uint32_t   num_stored = 0;
    int64_t    start      = OutStream_Tell(dat_out);
    int64_t    expected   = OutStream_Tell(ix_out) / 8;

    // Verify doc id.
    if (doc_id != expected) {
        THROW(ERR, "Expected doc id %i64 but got %i32", expected, doc_id);
    }

    // Write the number of stored fields.
    Inverter_Iterate(inverter);
    while (Inverter_Next(inverter)) {
        FieldType *type = Inverter_Get_Type(inverter);
        if (FType_Stored(type)) { num_stored++; }
    }
    OutStream_Write_C32(dat_out, num_stored);

    Inverter_Iterate(inverter);
    while (Inverter_Next(inverter)) {
        // Only store fields marked as "stored".
        FieldType *type = Inverter_Get_Type(inverter);
        if (FType_Stored(type)) {
            String *field = Inverter_Get_Field_Name(inverter);
            Obj *value = Inverter_Get_Value(inverter);
            Freezer_serialize_string(field, dat_out);
            switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) {
                case FType_TEXT: {
                    const char *buf  = Str_Get_Ptr8((String*)value);
                    size_t      size = Str_Get_Size((String*)value);
                    OutStream_Write_C32(dat_out, size);
                    OutStream_Write_Bytes(dat_out, buf, size);
                    break;
                }
                case FType_BLOB: {
                    char   *buf  = BB_Get_Buf((ByteBuf*)value);
                    size_t  size = BB_Get_Size((ByteBuf*)value);
                    OutStream_Write_C32(dat_out, size);
                    OutStream_Write_Bytes(dat_out, buf, size);
                    break;
                }
                case FType_INT32: {
                    int32_t val = Int32_Get_Value((Integer32*)value);
                    OutStream_Write_C32(dat_out, val);
                    break;
                }
                case FType_INT64: {
                    int64_t val = Int64_Get_Value((Integer64*)value);
                    OutStream_Write_C64(dat_out, val);
                    break;
                }
                case FType_FLOAT32: {
                    float val = Float32_Get_Value((Float32*)value);
                    OutStream_Write_F32(dat_out, val);
                    break;
                }
                case FType_FLOAT64: {
                    double val = Float64_Get_Value((Float64*)value);
                    OutStream_Write_F64(dat_out, val);
                    break;
                }
                default:
                    THROW(ERR, "Unrecognized type: %o", type);
            }
        }
    }

    // Write file pointer.
    OutStream_Write_I64(ix_out, start);
}