Beispiel #1
0
DocVector*
DefHLReader_fetch_doc_vec(DefaultHighlightReader *self, int32_t doc_id)
{
    DocVector *doc_vec = DocVec_new();
    int64_t file_pos;
    uint32_t num_fields;

    InStream_Seek(self->ix_in, doc_id * 8);
    file_pos = InStream_Read_I64(self->ix_in);
    InStream_Seek(self->dat_in, file_pos);

    num_fields = InStream_Read_C32(self->dat_in);
    while (num_fields--) {
        CharBuf *field = CB_deserialize(NULL, self->dat_in);
        ByteBuf *field_buf  = BB_deserialize(NULL, self->dat_in);
        DocVec_Add_Field_Buf(doc_vec, field, field_buf);
        DECREF(field_buf);
        DECREF(field);
    }

    return doc_vec;
}
Beispiel #2
0
DocVector*
DefHLReader_Fetch_Doc_Vec_IMP(DefaultHighlightReader *self, int32_t doc_id) {
    DefaultHighlightReaderIVARS *const ivars = DefHLReader_IVARS(self);
    InStream *const ix_in  = ivars->ix_in;
    InStream *const dat_in = ivars->dat_in;
    DocVector *doc_vec = DocVec_new();

    InStream_Seek(ix_in, doc_id * 8);
    int64_t file_pos = InStream_Read_I64(ix_in);
    InStream_Seek(dat_in, file_pos);

    uint32_t num_fields = InStream_Read_CU32(dat_in);
    while (num_fields--) {
        String *field = Freezer_read_string(dat_in);
        Blob *field_buf = Freezer_read_blob(dat_in);
        DocVec_Add_Field_Buf(doc_vec, field, field_buf);
        DECREF(field_buf);
        DECREF(field);
    }

    return doc_vec;
}
Beispiel #3
0
DocVector*
DefHLReader_fetch_doc_vec(DefaultHighlightReader *self, int32_t doc_id) {
    InStream *const ix_in  = self->ix_in;
    InStream *const dat_in = self->dat_in;
    DocVector *doc_vec = DocVec_new();

    InStream_Seek(ix_in, doc_id * 8);
    int64_t file_pos = InStream_Read_I64(ix_in);
    InStream_Seek(dat_in, file_pos);

    uint32_t num_fields = InStream_Read_C32(dat_in);
    while (num_fields--) {
        CharBuf *field
            = CB_Deserialize((CharBuf*)VTable_Make_Obj(CHARBUF), dat_in);
        ByteBuf *field_buf
            = BB_Deserialize((ByteBuf*)VTable_Make_Obj(BYTEBUF), dat_in);
        DocVec_Add_Field_Buf(doc_vec, field, field_buf);
        DECREF(field_buf);
        DECREF(field);
    }

    return doc_vec;
}
Beispiel #4
0
Obj*
I64SortCache_value(Int64SortCache *self, int32_t ord, Obj *blank) {
    if (ord == self->null_ord) {
        return NULL;
    }
    else if (ord < 0) {
        THROW(ERR, "Ordinal less than 0 for %o: %i32", self->field, ord);
    }
    else {
        Integer64 *int_blank = (Integer64*)CERTIFY(blank, INTEGER64);
        InStream_Seek(self->dat_in, ord * sizeof(int64_t));
        Int64_Set_Value(int_blank, InStream_Read_I64(self->dat_in));
    }
    return blank;
}
Beispiel #5
0
Obj*
F32SortCache_value(Float32SortCache *self, int32_t ord, Obj *blank) {
    if (ord == self->null_ord) {
        return NULL;
    }
    else if (ord < 0) {
        THROW(ERR, "Ordinal less than 0 for %o: %i32", self->field, ord);
    }
    else {
        Float32 *num_blank = (Float32*)CERTIFY(blank, FLOAT32);
        InStream_Seek(self->dat_in, ord * sizeof(float));
        Float32_Set_Value(num_blank, InStream_Read_F32(self->dat_in));
    }
    return blank;
}
Beispiel #6
0
static void
S_read_entry(LexIndex *self) {
    LexIndexIVARS *const ivars = LexIndex_IVARS(self);
    InStream *ix_in  = ivars->ix_in;
    TermInfo *const tinfo = ivars->tinfo;
    int64_t offset = (int64_t)NumUtil_decode_bigend_u64(ivars->offsets + ivars->tick);
    InStream_Seek(ix_in, offset);
    TermStepper_Read_Key_Frame(ivars->term_stepper, ix_in);
    int32_t doc_freq = InStream_Read_C32(ix_in);
    TInfo_Set_Doc_Freq(tinfo, doc_freq);
    TInfo_Set_Post_FilePos(tinfo, InStream_Read_C64(ix_in));
    int64_t skip_filepos = doc_freq >= ivars->skip_interval
                           ? InStream_Read_C64(ix_in)
                           : 0;
    TInfo_Set_Skip_FilePos(tinfo, skip_filepos);
    TInfo_Set_Lex_FilePos(tinfo, InStream_Read_C64(ix_in));
}
Beispiel #7
0
InStream*
InStream_reopen(InStream *self, const CharBuf *filename, int64_t offset,
                int64_t len) {
    if (!self->file_handle) {
        THROW(ERR, "Can't Reopen() closed InStream %o", self->filename);
    }
    if (offset + len > FH_Length(self->file_handle)) {
        THROW(ERR, "Offset + length too large (%i64 + %i64 > %i64)",
              offset, len, FH_Length(self->file_handle));
    }

    InStream *twin = (InStream*)VTable_Make_Obj(self->vtable);
    InStream_do_open(twin, (Obj*)self->file_handle);
    if (filename != NULL) { CB_Mimic(twin->filename, (Obj*)filename); }
    twin->offset = offset;
    twin->len    = len;
    InStream_Seek(twin, 0);

    return twin;
}
Beispiel #8
0
InStream*
InStream_reopen(InStream *self, const CharBuf *filename, int64_t offset,
                int64_t len) {
    InStreamIVARS *const ivars = InStream_IVARS(self);
    if (!ivars->file_handle) {
        THROW(ERR, "Can't Reopen() closed InStream %o", ivars->filename);
    }
    if (offset + len > FH_Length(ivars->file_handle)) {
        THROW(ERR, "Offset + length too large (%i64 + %i64 > %i64)",
              offset, len, FH_Length(ivars->file_handle));
    }

    VTable *vtable = InStream_Get_VTable(self);
    InStream *other = (InStream*)VTable_Make_Obj(vtable);
    InStreamIVARS *const ovars = InStream_IVARS(other);
    InStream_do_open(other, (Obj*)ivars->file_handle);
    if (filename != NULL) { CB_Mimic(ovars->filename, (Obj*)filename); }
    ovars->offset = offset;
    ovars->len    = len;
    InStream_Seek(other, 0);

    return other;
}
Beispiel #9
0
HitDoc*
DefDocReader_Fetch_Doc_IMP(DefaultDocReader *self, int32_t doc_id) {
    DefaultDocReaderIVARS *const ivars = DefDocReader_IVARS(self);
    Schema   *const schema = ivars->schema;
    InStream *const dat_in = ivars->dat_in;
    InStream *const ix_in  = ivars->ix_in;
    Hash     *const fields = Hash_new(1);
    int64_t   start;
    uint32_t  num_fields;
    uint32_t  field_name_cap = 31;
    char     *field_name = (char*)MALLOCATE(field_name_cap + 1);

    // Get data file pointer from index, read number of fields.
    InStream_Seek(ix_in, (int64_t)doc_id * 8);
    start = InStream_Read_U64(ix_in);
    InStream_Seek(dat_in, start);
    num_fields = InStream_Read_C32(dat_in);

    // Decode stored data and build up the doc field by field.
    while (num_fields--) {
        uint32_t        field_name_len;
        Obj       *value;
        FieldType *type;

        // Read field name.
        field_name_len = InStream_Read_C32(dat_in);
        if (field_name_len > field_name_cap) {
            field_name_cap = field_name_len;
            field_name     = (char*)REALLOCATE(field_name,
                                                    field_name_cap + 1);
        }
        InStream_Read_Bytes(dat_in, field_name, field_name_len);

        // Find the Field's FieldType.
        StackString *field_name_str
            = SSTR_WRAP_UTF8(field_name, field_name_len);
        type = Schema_Fetch_Type(schema, (String*)field_name_str);

        // Read the field value.
        switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) {
            case FType_TEXT: {
                    uint32_t value_len = InStream_Read_C32(dat_in);
                    char *buf = (char*)MALLOCATE(value_len + 1);
                    InStream_Read_Bytes(dat_in, buf, value_len);
                    buf[value_len] = '\0'; 
                    value = (Obj*)Str_new_steal_utf8(buf, value_len);
                    break;
                }
            case FType_BLOB: {
                    uint32_t value_len = InStream_Read_C32(dat_in);
                    char *buf = (char*)MALLOCATE(value_len);
                    InStream_Read_Bytes(dat_in, buf, value_len);
                    value = (Obj*)BB_new_steal_bytes(
                                buf, value_len, value_len);
                    break;
                }
            case FType_FLOAT32:
                value = (Obj*)Float32_new(
                                InStream_Read_F32(dat_in));
                break;
            case FType_FLOAT64:
                value = (Obj*)Float64_new(
                                InStream_Read_F64(dat_in));
                break;
            case FType_INT32:
                value = (Obj*)Int32_new(
                                (int32_t)InStream_Read_C32(dat_in));
                break;
            case FType_INT64:
                value = (Obj*)Int64_new(
                                (int64_t)InStream_Read_C64(dat_in));
                break;
            default:
                value = NULL;
                THROW(ERR, "Unrecognized type: %o", type);
        }

        // Store the value.
        Hash_Store_Utf8(fields, field_name, field_name_len, value);
    }
    FREEMEM(field_name);

    HitDoc *retval = HitDoc_new(fields, doc_id, 0.0);
    DECREF(fields);
    return retval;
}
Beispiel #10
0
void
LexIndex_Seek_IMP(LexIndex *self, Obj *target) {
    LexIndexIVARS *const ivars = LexIndex_IVARS(self);
    TermStepper *term_stepper = ivars->term_stepper;
    InStream    *ix_in        = ivars->ix_in;
    FieldType   *type         = ivars->field_type;
    int32_t      lo           = 0;
    int32_t      hi           = ivars->size - 1;
    int32_t      result       = -100;

    if (target == NULL || ivars->size == 0) {
        ivars->tick = 0;
        return;
    }
    else {
        if (!Obj_is_a(target, STRING)) {
            THROW(ERR, "Target is a %o, and not comparable to a %o",
                  Obj_get_class_name(target), Class_Get_Name(STRING));
        }
        /* TODO:
        Obj *first_obj = Vec_Fetch(terms, 0);
        if (!Obj_is_a(target, Obj_get_class(first_obj))) {
            THROW(ERR, "Target is a %o, and not comparable to a %o",
                Obj_get_class_name(target), Obj_get_class_name(first_obj));
        }
        */
    }

    // Divide and conquer.
    while (hi >= lo) {
        const int32_t mid = lo + ((hi - lo) / 2);
        const int64_t offset
            = (int64_t)NumUtil_decode_bigend_u64(ivars->offsets + mid);
        InStream_Seek(ix_in, offset);
        TermStepper_Read_Key_Frame(term_stepper, ix_in);

        // Compare values.  There is no need for a NULL-check because the term
        // number is alway between 0 and ivars->size - 1.
        Obj *value = TermStepper_Get_Value(term_stepper);
        int32_t comparison = FType_Compare_Values(type, target, value);

        if (comparison < 0) {
            hi = mid - 1;
        }
        else if (comparison > 0) {
            lo = mid + 1;
        }
        else {
            result = mid;
            break;
        }
    }

    // Record the index of the entry we've seeked to, then read entry.
    ivars->tick = hi == -1 // indicating that target lt first entry
                 ? 0
                 : result == -100 // if result is still -100, it wasn't set
                 ? hi
                 : result;
    S_read_entry(self);
}