Esempio n. 1
0
static void
test_refill(TestBatchRunner *runner) {
    RAMFile    *file      = RAMFile_new(NULL, false);
    OutStream  *outstream = OutStream_open((Obj*)file);
    InStream   *instream;
    char        scratch[5];
    InStreamIVARS *ivars;

    for (int32_t i = 0; i < 1023; i++) {
        OutStream_Write_U8(outstream, 'x');
    }
    OutStream_Write_U8(outstream, 'y');
    OutStream_Write_U8(outstream, 'z');
    OutStream_Close(outstream);

    instream = InStream_open((Obj*)file);
    ivars = InStream_IVARS(instream);
    InStream_Refill(instream);
    TEST_INT_EQ(runner, ivars->limit - ivars->buf, IO_STREAM_BUF_SIZE,
                "Refill");
    TEST_INT_EQ(runner, (long)InStream_Tell(instream), 0,
                "Correct file pos after standing-start Refill()");
    DECREF(instream);

    instream = InStream_open((Obj*)file);
    ivars = InStream_IVARS(instream);
    InStream_Fill(instream, 30);
    TEST_INT_EQ(runner, ivars->limit - ivars->buf, 30, "Fill()");
    TEST_INT_EQ(runner, (long)InStream_Tell(instream), 0,
                "Correct file pos after standing-start Fill()");
    DECREF(instream);

    instream = InStream_open((Obj*)file);
    ivars = InStream_IVARS(instream);
    InStream_Read_Bytes(instream, scratch, 5);
    TEST_INT_EQ(runner, ivars->limit - ivars->buf,
                IO_STREAM_BUF_SIZE - 5, "small read triggers refill");
    DECREF(instream);

    instream = InStream_open((Obj*)file);
    ivars = InStream_IVARS(instream);
    TEST_INT_EQ(runner, InStream_Read_U8(instream), 'x', "Read_U8");
    InStream_Seek(instream, 1023);
    TEST_INT_EQ(runner, (long)FileWindow_IVARS(ivars->window)->offset, 0,
                "no unnecessary refill on Seek");
    TEST_INT_EQ(runner, (long)InStream_Tell(instream), 1023, "Seek/Tell");
    TEST_INT_EQ(runner, InStream_Read_U8(instream), 'y',
                "correct data after in-buffer Seek()");
    TEST_INT_EQ(runner, InStream_Read_U8(instream), 'z', "automatic Refill");
    TEST_TRUE(runner, (FileWindow_IVARS(ivars->window)->offset != 0),
              "refilled");

    DECREF(instream);
    DECREF(outstream);
    DECREF(file);
}
Esempio n. 2
0
static void
test_Clone_and_Reopen(TestBatchRunner *runner) {
    String        *foo       = SSTR_WRAP_C("foo");
    String        *bar       = SSTR_WRAP_C("bar");
    RAMFile       *file      = RAMFile_new(NULL, false);
    OutStream     *outstream = OutStream_open((Obj*)file);
    RAMFileHandle *fh;
    InStream      *instream;
    InStream      *clone;
    InStream      *reopened;

    for (uint8_t i = 0; i < 26; i++) {
        OutStream_Write_U8(outstream, 'a' + i);
    }
    OutStream_Close(outstream);

    fh = RAMFH_open(foo, FH_READ_ONLY, file);
    instream = InStream_open((Obj*)fh);
    InStream_Seek(instream, 1);
    TEST_TRUE(runner, Str_Equals(InStream_Get_Filename(instream), (Obj*)foo),
              "Get_Filename");

    clone    = InStream_Clone(instream);
    TEST_TRUE(runner, Str_Equals(InStream_Get_Filename(clone), (Obj*)foo),
              "Clones have same filename");
    TEST_TRUE(runner, InStream_Length(instream) == InStream_Length(clone),
              "Clones have same length");
    TEST_TRUE(runner, InStream_Read_U8(instream) == InStream_Read_U8(clone),
              "Clones start at same file position");

    reopened = InStream_Reopen(instream, bar, 25, 1);
    TEST_TRUE(runner, Str_Equals(InStream_Get_Filename(reopened), (Obj*)bar),
              "Reopened InStreams take new filename");
    TEST_TRUE(runner, InStream_Read_U8(reopened) == 'z',
              "Reopened stream starts at supplied offset");
    TEST_TRUE(runner, InStream_Length(reopened) == 1,
              "Reopened stream uses supplied length");
    TEST_TRUE(runner, InStream_Tell(reopened) == 1,
              "Tell() uses supplied offset for reopened stream");
    InStream_Seek(reopened, 0);
    TEST_TRUE(runner, InStream_Read_U8(reopened) == 'z',
              "Seek() uses supplied offset for reopened stream");

    DECREF(reopened);
    DECREF(clone);
    DECREF(instream);
    DECREF(outstream);
    DECREF(fh);
    DECREF(file);
}
Esempio n. 3
0
RawPosting*
RichPost_read_raw(RichPosting *self, InStream *instream, int32_t last_doc_id, 
                  CharBuf *term_text, MemoryPool *mem_pool)
{
    char *const    text_buf       = (char*)CB_Get_Ptr8(term_text);
    const size_t   text_size      = CB_Get_Size(term_text);
    const uint32_t doc_code       = InStream_Read_C32(instream);
    const uint32_t delta_doc      = doc_code >> 1;
    const int32_t  doc_id         = last_doc_id + delta_doc;
    const uint32_t freq           = (doc_code & 1) 
                                  ? 1 
                                  : InStream_Read_C32(instream);
    size_t raw_post_bytes         = MAX_RAW_POSTING_LEN(text_size, freq);
    void *const allocation        = MemPool_Grab(mem_pool, raw_post_bytes);
    RawPosting *const raw_posting = RawPost_new(allocation, doc_id, freq,
        text_buf, text_size);
    uint32_t num_prox = freq;
    char *const start = raw_posting->blob + text_size;
    char *      dest  = start;
    UNUSED_VAR(self);

    // Read positions and per-position boosts. 
    while (num_prox--) {
        dest += InStream_Read_Raw_C64(instream, dest);
        *((uint8_t*)dest) = InStream_Read_U8(instream);
        dest++;
    }

    // Resize raw posting memory allocation. 
    raw_posting->aux_len = dest - start;
    raw_post_bytes       = dest - (char*)raw_posting;
    MemPool_Resize(mem_pool, raw_posting, raw_post_bytes);

    return raw_posting;
}
Esempio n. 4
0
RangeQuery*
RangeQuery_Deserialize_IMP(RangeQuery *self, InStream *instream) {
    // Deserialize components.
    float boost = InStream_Read_F32(instream);
    String *field = Freezer_read_string(instream);
    Obj *lower_term = InStream_Read_U8(instream) ? THAW(instream) : NULL;
    Obj *upper_term = InStream_Read_U8(instream) ? THAW(instream) : NULL;
    bool include_lower = !!InStream_Read_U8(instream);
    bool include_upper = !!InStream_Read_U8(instream);

    // Init object.
    RangeQuery_init(self, field, lower_term, upper_term, include_lower,
                    include_upper);
    RangeQuery_Set_Boost(self, boost);

    DECREF(upper_term);
    DECREF(lower_term);
    DECREF(field);
    return self;
}
Esempio n. 5
0
MatchDoc*
MatchDoc_deserialize(MatchDoc *self, InStream *instream)
{
    self = self ? self : (MatchDoc*)VTable_Make_Obj(&MATCHDOC);
    self->doc_id = InStream_Read_C32(instream);
    self->score  = InStream_Read_Float(instream);
    if (InStream_Read_U8(instream)) {
        self->values = VA_deserialize(NULL, instream);
    }
    return self;
}
Esempio n. 6
0
RangeQuery*
RangeQuery_deserialize(RangeQuery *self, InStream *instream) {
    // Deserialize components.
    float boost = InStream_Read_F32(instream);
    CharBuf *field
        = CB_Deserialize((CharBuf*)VTable_Make_Obj(CHARBUF), instream);
    Obj *lower_term = InStream_Read_U8(instream) ? THAW(instream) : NULL;
    Obj *upper_term = InStream_Read_U8(instream) ? THAW(instream) : NULL;
    bool_t include_lower = InStream_Read_U8(instream);
    bool_t include_upper = InStream_Read_U8(instream);

    // Init object.
    RangeQuery_init(self, field, lower_term, upper_term, include_lower,
                    include_upper);
    RangeQuery_Set_Boost(self, boost);

    DECREF(upper_term);
    DECREF(lower_term);
    DECREF(field);
    return self;
}
Esempio n. 7
0
LeafQuery*
LeafQuery_Deserialize_IMP(LeafQuery *self, InStream *instream) {
    LeafQueryIVARS *const ivars = LeafQuery_IVARS(self);
    if (InStream_Read_U8(instream)) {
        ivars->field = Freezer_read_string(instream);
    }
    else {
        ivars->field = NULL;
    }
    ivars->text = Freezer_read_string(instream);
    ivars->boost = InStream_Read_F32(instream);
    return self;
}
Esempio n. 8
0
void
RichPost_read_record(RichPosting *self, InStream *instream)
{
    float *const norm_decoder = self->norm_decoder;
    uint32_t  doc_code;
    uint32_t  num_prox = 0;
    uint32_t  position = 0; 
    uint32_t *positions;
    float    *prox_boosts;
    float     aggregate_weight = 0.0;

    // Decode delta doc. 
    doc_code = InStream_Read_C32(instream);
    self->doc_id   += doc_code >> 1;

    // If the stored num was odd, the freq is 1.  
    if (doc_code & 1) {
        self->freq = 1;
    }
    // Otherwise, freq was stored as a C32. 
    else {
        self->freq = InStream_Read_C32(instream);
    } 

    // Read positions, aggregate per-position boost byte into weight. 
    num_prox = self->freq;
    if (num_prox > self->prox_cap) {
        self->prox 
            = (uint32_t*)REALLOCATE(self->prox, num_prox * sizeof(uint32_t));
        self->prox_boosts 
            = (float*)REALLOCATE(self->prox_boosts, num_prox * sizeof(float));
    }
    positions   = self->prox;
    prox_boosts = self->prox_boosts;

    while (num_prox--) {
        position += InStream_Read_C32(instream);
        *positions++ = position;
        *prox_boosts = norm_decoder[ InStream_Read_U8(instream) ];
        aggregate_weight += *prox_boosts;
        prox_boosts++;
    }
    self->weight = aggregate_weight / self->freq;
}
Esempio n. 9
0
RawPosting*
ScorePost_Read_Raw_IMP(ScorePosting *self, InStream *instream,
                       int32_t last_doc_id, String *term_text,
                       MemoryPool *mem_pool) {
    const char *const text_buf  = Str_Get_Ptr8(term_text);
    const size_t      text_size = Str_Get_Size(term_text);
    const uint32_t    doc_code  = InStream_Read_C32(instream);
    const uint32_t    delta_doc = doc_code >> 1;
    const int32_t     doc_id    = last_doc_id + delta_doc;
    const uint32_t    freq      = (doc_code & 1)
                                  ? 1
                                  : InStream_Read_C32(instream);
    const size_t base_size = Class_Get_Obj_Alloc_Size(RAWPOSTING);
    size_t raw_post_bytes  = MAX_RAW_POSTING_LEN(base_size, text_size, freq);
    void *const allocation = MemPool_Grab(mem_pool, raw_post_bytes);
    RawPosting *const raw_posting
        = RawPost_new(allocation, doc_id, freq, text_buf, text_size);
    RawPostingIVARS *const raw_post_ivars = RawPost_IVARS(raw_posting);
    uint32_t num_prox = freq;
    char *const start = raw_post_ivars->blob + text_size;
    char *dest        = start;
    UNUSED_VAR(self);

    // Field_boost.
    *((uint8_t*)dest) = InStream_Read_U8(instream);
    dest++;

    // Read positions.
    while (num_prox--) {
        dest += InStream_Read_Raw_C64(instream, dest);
    }

    // Resize raw posting memory allocation.
    raw_post_ivars->aux_len = dest - start;
    raw_post_bytes       = dest - (char*)raw_posting;
    MemPool_Resize(mem_pool, raw_posting, raw_post_bytes);

    return raw_posting;
}
Esempio n. 10
0
Obj*
Freezer_deserialize(Obj *obj, InStream *instream) {
    if (Obj_is_a(obj, STRING)) {
        obj = (Obj*)Freezer_deserialize_string((String*)obj, instream);
    }
    else if (Obj_is_a(obj, BLOB)) {
        obj = (Obj*)Freezer_deserialize_blob((Blob*)obj, instream);
    }
    else if (Obj_is_a(obj, VECTOR)) {
        obj = (Obj*)Freezer_deserialize_varray((Vector*)obj, instream);
    }
    else if (Obj_is_a(obj, HASH)) {
        obj = (Obj*)Freezer_deserialize_hash((Hash*)obj, instream);
    }
    else if (Obj_is_a(obj, INTEGER)) {
        int64_t value = (int64_t)InStream_Read_C64(instream);
        obj = (Obj*)Int_init((Integer*)obj, value);
    }
    else if (Obj_is_a(obj, FLOAT)) {
        double value = InStream_Read_F64(instream);
        obj = (Obj*)Float_init((Float*)obj, value);
    }
    else if (Obj_is_a(obj, BOOLEAN)) {
        bool value = !!InStream_Read_U8(instream);
        Obj *result = value ? INCREF(CFISH_TRUE) : INCREF(CFISH_FALSE);
        // FIXME: This DECREF is essentially a no-op causing a
        // memory leak.
        DECREF(obj);
        obj = result;
    }
    else if (Obj_is_a(obj, QUERY)) {
        obj = (Obj*)Query_Deserialize((Query*)obj, instream);
    }
    else if (Obj_is_a(obj, DOC)) {
        obj = (Obj*)Doc_Deserialize((Doc*)obj, instream);
    }
    else if (Obj_is_a(obj, DOCVECTOR)) {
        obj = (Obj*)DocVec_Deserialize((DocVector*)obj, instream);
    }
    else if (Obj_is_a(obj, TERMVECTOR)) {
        obj = (Obj*)TV_Deserialize((TermVector*)obj, instream);
    }
    else if (Obj_is_a(obj, SIMILARITY)) {
        obj = (Obj*)Sim_Deserialize((Similarity*)obj, instream);
    }
    else if (Obj_is_a(obj, MATCHDOC)) {
        obj = (Obj*)MatchDoc_Deserialize((MatchDoc*)obj, instream);
    }
    else if (Obj_is_a(obj, TOPDOCS)) {
        obj = (Obj*)TopDocs_Deserialize((TopDocs*)obj, instream);
    }
    else if (Obj_is_a(obj, SORTSPEC)) {
        obj = (Obj*)SortSpec_Deserialize((SortSpec*)obj, instream);
    }
    else if (Obj_is_a(obj, SORTRULE)) {
        obj = (Obj*)SortRule_Deserialize((SortRule*)obj, instream);
    }
    else {
        THROW(ERR, "Don't know how to deserialize a %o",
              Obj_get_class_name(obj));
    }

    return obj;
}