예제 #1
0
파일: TermVector.c 프로젝트: rectang/lucy
TermVector*
TV_Deserialize_IMP(TermVector *self, InStream *instream) {
    String *field = Freezer_read_string(instream);
    String *text  = Freezer_read_string(instream);
    size_t  num_pos = InStream_Read_C64(instream);

    // Read positional data.
    int32_t *posits = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t));
    int32_t *starts = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t));
    int32_t *ends   = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t));
    for (size_t i = 0; i < num_pos; i++) {
        posits[i] = InStream_Read_C32(instream);
        starts[i] = InStream_Read_C32(instream);
        ends[i]   = InStream_Read_C32(instream);
    }
    I32Array *positions     = I32Arr_new_steal(posits, num_pos);
    I32Array *start_offsets = I32Arr_new_steal(starts, num_pos);
    I32Array *end_offsets   = I32Arr_new_steal(ends, num_pos);

    TV_init(self, field, text, positions, start_offsets, end_offsets);

    DECREF(positions);
    DECREF(start_offsets);
    DECREF(end_offsets);
    DECREF(text);
    DECREF(field);

    return self;
}
예제 #2
0
파일: LexIndex.c 프로젝트: kidaa/lucy
static void
S_read_entry(LexIndex *self) {
    LexIndexIVARS *const ivars = LexIndex_IVARS(self);
    InStream *ix_in  = ivars->ix_in;
    TermInfo *const tinfo = ivars->tinfo;
    int64_t offset = (int64_t)NumUtil_decode_bigend_u64(ivars->offsets + ivars->tick);
    InStream_Seek(ix_in, offset);
    TermStepper_Read_Key_Frame(ivars->term_stepper, ix_in);
    int32_t doc_freq = InStream_Read_C32(ix_in);
    TInfo_Set_Doc_Freq(tinfo, doc_freq);
    TInfo_Set_Post_FilePos(tinfo, InStream_Read_C64(ix_in));
    int64_t skip_filepos = doc_freq >= ivars->skip_interval
                           ? InStream_Read_C64(ix_in)
                           : 0;
    TInfo_Set_Skip_FilePos(tinfo, skip_filepos);
    TInfo_Set_Lex_FilePos(tinfo, InStream_Read_C64(ix_in));
}
예제 #3
0
void
MatchTInfoStepper_read_delta(MatchTermInfoStepper *self, InStream *instream) {
    TermInfo *const tinfo = (TermInfo*)self->value;

    // Read doc freq.
    tinfo->doc_freq = InStream_Read_C32(instream);

    // Adjust postings file pointer.
    tinfo->post_filepos += InStream_Read_C64(instream);

    // Maybe read skip pointer.
    if (tinfo->doc_freq >= self->skip_interval) {
        tinfo->skip_filepos = InStream_Read_C64(instream);
    }
    else {
        tinfo->skip_filepos = 0;
    }
}
예제 #4
0
void
MatchTInfoStepper_Read_Delta_IMP(MatchTermInfoStepper *self, InStream *instream) {
    MatchTermInfoStepperIVARS *const ivars = MatchTInfoStepper_IVARS(self);
    TermInfoIVARS *const tinfo_ivars = TInfo_IVARS((TermInfo*)ivars->value);

    // Read doc freq.
    tinfo_ivars->doc_freq = InStream_Read_C32(instream);

    // Adjust postings file pointer.
    tinfo_ivars->post_filepos += InStream_Read_C64(instream);

    // Maybe read skip pointer.
    if (tinfo_ivars->doc_freq >= ivars->skip_interval) {
        tinfo_ivars->skip_filepos = InStream_Read_C64(instream);
    }
    else {
        tinfo_ivars->skip_filepos = 0;
    }
}
예제 #5
0
파일: DocReader.c 프로젝트: hernan604/lucy
HitDoc*
DefDocReader_Fetch_Doc_IMP(DefaultDocReader *self, int32_t doc_id) {
    DefaultDocReaderIVARS *const ivars = DefDocReader_IVARS(self);
    Schema   *const schema = ivars->schema;
    InStream *const dat_in = ivars->dat_in;
    InStream *const ix_in  = ivars->ix_in;
    Hash     *const fields = Hash_new(1);
    int64_t   start;
    uint32_t  num_fields;
    uint32_t  field_name_cap = 31;
    char     *field_name = (char*)MALLOCATE(field_name_cap + 1);

    // Get data file pointer from index, read number of fields.
    InStream_Seek(ix_in, (int64_t)doc_id * 8);
    start = InStream_Read_U64(ix_in);
    InStream_Seek(dat_in, start);
    num_fields = InStream_Read_C32(dat_in);

    // Decode stored data and build up the doc field by field.
    while (num_fields--) {
        uint32_t        field_name_len;
        Obj       *value;
        FieldType *type;

        // Read field name.
        field_name_len = InStream_Read_C32(dat_in);
        if (field_name_len > field_name_cap) {
            field_name_cap = field_name_len;
            field_name     = (char*)REALLOCATE(field_name,
                                                    field_name_cap + 1);
        }
        InStream_Read_Bytes(dat_in, field_name, field_name_len);

        // Find the Field's FieldType.
        StackString *field_name_str
            = SSTR_WRAP_UTF8(field_name, field_name_len);
        type = Schema_Fetch_Type(schema, (String*)field_name_str);

        // Read the field value.
        switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) {
            case FType_TEXT: {
                    uint32_t value_len = InStream_Read_C32(dat_in);
                    char *buf = (char*)MALLOCATE(value_len + 1);
                    InStream_Read_Bytes(dat_in, buf, value_len);
                    buf[value_len] = '\0'; 
                    value = (Obj*)Str_new_steal_utf8(buf, value_len);
                    break;
                }
            case FType_BLOB: {
                    uint32_t value_len = InStream_Read_C32(dat_in);
                    char *buf = (char*)MALLOCATE(value_len);
                    InStream_Read_Bytes(dat_in, buf, value_len);
                    value = (Obj*)BB_new_steal_bytes(
                                buf, value_len, value_len);
                    break;
                }
            case FType_FLOAT32:
                value = (Obj*)Float32_new(
                                InStream_Read_F32(dat_in));
                break;
            case FType_FLOAT64:
                value = (Obj*)Float64_new(
                                InStream_Read_F64(dat_in));
                break;
            case FType_INT32:
                value = (Obj*)Int32_new(
                                (int32_t)InStream_Read_C32(dat_in));
                break;
            case FType_INT64:
                value = (Obj*)Int64_new(
                                (int64_t)InStream_Read_C64(dat_in));
                break;
            default:
                value = NULL;
                THROW(ERR, "Unrecognized type: %o", type);
        }

        // Store the value.
        Hash_Store_Utf8(fields, field_name, field_name_len, value);
    }
    FREEMEM(field_name);

    HitDoc *retval = HitDoc_new(fields, doc_id, 0.0);
    DECREF(fields);
    return retval;
}
예제 #6
0
파일: Freezer.c 프로젝트: rectang/lucy
Obj*
Freezer_deserialize(Obj *obj, InStream *instream) {
    if (Obj_is_a(obj, STRING)) {
        obj = (Obj*)Freezer_deserialize_string((String*)obj, instream);
    }
    else if (Obj_is_a(obj, BLOB)) {
        obj = (Obj*)Freezer_deserialize_blob((Blob*)obj, instream);
    }
    else if (Obj_is_a(obj, VECTOR)) {
        obj = (Obj*)Freezer_deserialize_varray((Vector*)obj, instream);
    }
    else if (Obj_is_a(obj, HASH)) {
        obj = (Obj*)Freezer_deserialize_hash((Hash*)obj, instream);
    }
    else if (Obj_is_a(obj, INTEGER)) {
        int64_t value = (int64_t)InStream_Read_C64(instream);
        obj = (Obj*)Int_init((Integer*)obj, value);
    }
    else if (Obj_is_a(obj, FLOAT)) {
        double value = InStream_Read_F64(instream);
        obj = (Obj*)Float_init((Float*)obj, value);
    }
    else if (Obj_is_a(obj, BOOLEAN)) {
        bool value = !!InStream_Read_U8(instream);
        Obj *result = value ? INCREF(CFISH_TRUE) : INCREF(CFISH_FALSE);
        // FIXME: This DECREF is essentially a no-op causing a
        // memory leak.
        DECREF(obj);
        obj = result;
    }
    else if (Obj_is_a(obj, QUERY)) {
        obj = (Obj*)Query_Deserialize((Query*)obj, instream);
    }
    else if (Obj_is_a(obj, DOC)) {
        obj = (Obj*)Doc_Deserialize((Doc*)obj, instream);
    }
    else if (Obj_is_a(obj, DOCVECTOR)) {
        obj = (Obj*)DocVec_Deserialize((DocVector*)obj, instream);
    }
    else if (Obj_is_a(obj, TERMVECTOR)) {
        obj = (Obj*)TV_Deserialize((TermVector*)obj, instream);
    }
    else if (Obj_is_a(obj, SIMILARITY)) {
        obj = (Obj*)Sim_Deserialize((Similarity*)obj, instream);
    }
    else if (Obj_is_a(obj, MATCHDOC)) {
        obj = (Obj*)MatchDoc_Deserialize((MatchDoc*)obj, instream);
    }
    else if (Obj_is_a(obj, TOPDOCS)) {
        obj = (Obj*)TopDocs_Deserialize((TopDocs*)obj, instream);
    }
    else if (Obj_is_a(obj, SORTSPEC)) {
        obj = (Obj*)SortSpec_Deserialize((SortSpec*)obj, instream);
    }
    else if (Obj_is_a(obj, SORTRULE)) {
        obj = (Obj*)SortRule_Deserialize((SortRule*)obj, instream);
    }
    else {
        THROW(ERR, "Don't know how to deserialize a %o",
              Obj_get_class_name(obj));
    }

    return obj;
}