Exemple #1
0
static void
test_Seek_and_Tell(TestBatchRunner *runner) {
    int64_t     gb1      = INT64_C(0x40000000);
    int64_t     gb3      = gb1 * 3;
    int64_t     gb6      = gb1 * 6;
    int64_t     gb12     = gb1 * 12;
    FileHandle *fh       = (FileHandle*)MockFileHandle_new(NULL, gb12);
    InStream   *instream = InStream_open((Obj*)fh);
    InStreamIVARS *const ivars = InStream_IVARS(instream);

    InStream_Buf(instream, 10000);
    TEST_TRUE(runner, ivars->limit == ((char*)NULL) + 10000,
              "InStream_Buf sets limit");

    InStream_Seek(instream, gb6);
    TEST_TRUE(runner, InStream_Tell(instream) == gb6,
              "Tell after seek forwards outside buffer");
    TEST_TRUE(runner, ivars->buf == NULL,
              "Seek forwards outside buffer sets buf to NULL");
    TEST_TRUE(runner, ivars->limit == NULL,
              "Seek forwards outside buffer sets limit to NULL");
    TEST_TRUE(runner, FileWindow_IVARS(ivars->window)->offset == gb6,
              "Seek forwards outside buffer tracks pos in window offset");

    InStream_Buf(instream, (size_t)gb1);
    TEST_TRUE(runner, ivars->limit == ((char*)NULL) + gb1,
              "InStream_Buf sets limit");

    InStream_Seek(instream, gb6 + 10);
    TEST_TRUE(runner, InStream_Tell(instream) == gb6 + 10,
              "Tell after seek forwards within buffer");
    TEST_TRUE(runner, ivars->buf == ((char*)NULL) + 10,
              "Seek within buffer sets buf");
    TEST_TRUE(runner, ivars->limit == ((char*)NULL) + gb1,
              "Seek within buffer leaves limit alone");

    InStream_Seek(instream, gb6 + 1);
    TEST_TRUE(runner, InStream_Tell(instream) == gb6 + 1,
              "Tell after seek backwards within buffer");
    TEST_TRUE(runner, ivars->buf == ((char*)NULL) + 1,
              "Seek backwards within buffer sets buf");
    TEST_TRUE(runner, ivars->limit == ((char*)NULL) + gb1,
              "Seek backwards within buffer leaves limit alone");

    InStream_Seek(instream, gb3);
    TEST_TRUE(runner, InStream_Tell(instream) == gb3,
              "Tell after seek backwards outside buffer");
    TEST_TRUE(runner, ivars->buf == NULL,
              "Seek backwards outside buffer sets buf to NULL");
    TEST_TRUE(runner, ivars->limit == NULL,
              "Seek backwards outside buffer sets limit to NULL");
    TEST_TRUE(runner, FileWindow_IVARS(ivars->window)->offset == gb3,
              "Seek backwards outside buffer tracks pos in window offset");

    DECREF(instream);
    DECREF(fh);
}
LexIndex*
LexIndex_init(LexIndex *self, Schema *schema, Folder *folder, 
              Segment *segment, const CharBuf *field)
{
    i32_t    field_num = Seg_Field_Num(segment, field);
    CharBuf *seg_name  = Seg_Get_Name(segment);
    CharBuf *ixix_file = CB_newf("%o/lexicon-%i32.ixix", seg_name, field_num);
    CharBuf *ix_file   = CB_newf("%o/lexicon-%i32.ix", seg_name, field_num);
    Architecture *arch = Schema_Get_Architecture(schema);

    /* Init. */
    self->term  = ViewCB_new_from_trusted_utf8(NULL, 0);
    self->tinfo = TInfo_new(0,0,0,0);
    self->tick  = 0;

    /* Derive */
    self->field_type = Schema_Fetch_Type(schema, field);
    if (!self->field_type) {
        CharBuf *mess = MAKE_MESS("Unknown field: '%o'", field);
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        Err_throw_mess(mess);
    }
    INCREF(self->field_type);
    self->ixix_in = Folder_Open_In(folder, ixix_file);
    self->ix_in   = Folder_Open_In(folder, ix_file);
    if (!self->ixix_in || !self->ix_in) {
        CharBuf *mess =
             MAKE_MESS("Can't open either %o or %o", ix_file, ixix_file);
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        Err_throw_mess(mess);
    }
    self->index_interval = Arch_Index_Interval(arch);
    self->skip_interval  = Arch_Skip_Interval(arch);
    self->size    = (i32_t)(InStream_Length(self->ixix_in) / sizeof(i64_t));
    self->offsets = (i64_t*)InStream_Buf(self->ixix_in,
        (size_t)InStream_Length(self->ixix_in));
    self->data = InStream_Buf(self->ix_in, InStream_Length(self->ix_in));
    self->limit = self->data + InStream_Length(self->ix_in);

    DECREF(ixix_file);
    DECREF(ix_file);

    return self;
}
Exemple #3
0
NumericSortCache*
NumSortCache_init(NumericSortCache *self, const CharBuf *field,
                  FieldType *type, int32_t cardinality, int32_t doc_max,
                  int32_t null_ord, int32_t ord_width, InStream *ord_in,
                  InStream *dat_in) {
    // Validate.
    if (!type || !FType_Sortable(type) || !FType_Is_A(type, NUMERICTYPE)) {
        DECREF(self);
        THROW(ERR, "'%o' isn't a sortable NumericType field", field);
    }

    // Mmap ords and super-init.
    int64_t  ord_len = InStream_Length(ord_in);
    void    *ords    = InStream_Buf(ord_in, (size_t)ord_len);
    SortCache_init((SortCache*)self, field, type, ords, cardinality, doc_max,
                   null_ord, ord_width);

    // Assign.
    self->ord_in = (InStream*)INCREF(ord_in);
    self->dat_in = (InStream*)INCREF(dat_in);

    // Validate ord file length.
    double BITS_PER_BYTE = 8.0;
    double docs_per_byte = BITS_PER_BYTE / self->ord_width;
    double max_ords      = ord_len * docs_per_byte;
    if (max_ords < self->doc_max + 1) {
        DECREF(self);
        THROW(ERR, "Conflict between ord count max %f64 and doc_max %i32 for "
              "field %o", max_ords, self->doc_max, field);
    }

    ABSTRACT_CLASS_CHECK(self, NUMERICSORTCACHE);
    return self;
}
Exemple #4
0
TextSortCache*
TextSortCache_init(TextSortCache *self, String *field,
                   FieldType *type, int32_t cardinality,
                   int32_t doc_max, int32_t null_ord, int32_t ord_width,
                   InStream *ord_in, InStream *ix_in, InStream *dat_in) {
    // Validate.
    if (!type || !FType_Sortable(type)) {
        DECREF(self);
        THROW(ERR, "'%o' isn't a sortable field", field);
    }

    // Memory map ords and super-init.
    int64_t ord_len = InStream_Length(ord_in);
    const void *ords = InStream_Buf(ord_in, (size_t)ord_len);
    SortCache_init((SortCache*)self, field, type, ords, cardinality, doc_max,
                   null_ord, ord_width);
    TextSortCacheIVARS *const ivars = TextSortCache_IVARS(self);

    // Validate ords file length.
    double  bytes_per_doc = ivars->ord_width / 8.0;
    double  max_ords      = ord_len / bytes_per_doc;
    if (max_ords < ivars->doc_max + 1) {
        WARN("ORD WIDTH: %i32 %i32", ord_width, ivars->ord_width);
        THROW(ERR, "Conflict between ord count max %f64 and doc_max %i32 for "
              "field %o", max_ords, doc_max, field);
    }

    // Assign.
    ivars->ord_in = (InStream*)INCREF(ord_in);
    ivars->ix_in  = (InStream*)INCREF(ix_in);
    ivars->dat_in = (InStream*)INCREF(dat_in);

    return self;
}
Exemple #5
0
LexIndex*
LexIndex_init(LexIndex *self, Schema *schema, Folder *folder,
              Segment *segment, String *field) {
    int32_t  field_num = Seg_Field_Num(segment, field);
    String  *seg_name  = Seg_Get_Name(segment);
    String  *ixix_file = Str_newf("%o/lexicon-%i32.ixix", seg_name, field_num);
    String  *ix_file   = Str_newf("%o/lexicon-%i32.ix", seg_name, field_num);
    Architecture *arch = Schema_Get_Architecture(schema);

    // Init.
    Lex_init((Lexicon*)self, field);
    LexIndexIVARS *const ivars = LexIndex_IVARS(self);
    ivars->tinfo        = TInfo_new(0);
    ivars->tick         = 0;

    // Derive
    ivars->field_type = Schema_Fetch_Type(schema, field);
    if (!ivars->field_type) {
        String *mess = MAKE_MESS("Unknown field: '%o'", field);
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        Err_throw_mess(ERR, mess);
    }
    ivars->field_type = (FieldType*)INCREF(ivars->field_type);
    ivars->term_stepper = FType_Make_Term_Stepper(ivars->field_type);
    ivars->ixix_in = Folder_Open_In(folder, ixix_file);
    if (!ivars->ixix_in) {
        Err *error = (Err*)INCREF(Err_get_error());
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        RETHROW(error);
    }
    ivars->ix_in = Folder_Open_In(folder, ix_file);
    if (!ivars->ix_in) {
        Err *error = (Err*)INCREF(Err_get_error());
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        RETHROW(error);
    }
    ivars->index_interval = Arch_Index_Interval(arch);
    ivars->skip_interval  = Arch_Skip_Interval(arch);
    ivars->size    = (int32_t)(InStream_Length(ivars->ixix_in) / sizeof(int64_t));
    ivars->offsets = (const int64_t*)InStream_Buf(ivars->ixix_in,
            (size_t)InStream_Length(ivars->ixix_in));

    DECREF(ixix_file);
    DECREF(ix_file);

    return self;
}
Exemple #6
0
void
ScorePost_Read_Record_IMP(ScorePosting *self, InStream *instream) {
    ScorePostingIVARS *const ivars = ScorePost_IVARS(self);
    uint32_t  position = 0;
    const size_t max_start_bytes = (C32_MAX_BYTES * 2) + 1;
    const char *buf = InStream_Buf(instream, max_start_bytes);
    const uint32_t doc_code = NumUtil_decode_c32(&buf);
    const uint32_t doc_delta = doc_code >> 1;

    // Apply delta doc and retrieve freq.
    ivars->doc_id   += doc_delta;
    if (doc_code & 1) {
        ivars->freq = 1;
    }
    else {
        ivars->freq = NumUtil_decode_c32(&buf);
    }

    // Decode boost/norm byte.
    ivars->weight = ivars->norm_decoder[*(uint8_t*)buf];
    buf++;

    // Read positions.
    uint32_t num_prox = ivars->freq;
    if (num_prox > ivars->prox_cap) {
        ivars->prox = (uint32_t*)REALLOCATE(
                         ivars->prox, num_prox * sizeof(uint32_t));
        ivars->prox_cap = num_prox;
    }
    uint32_t *positions = ivars->prox;

    InStream_Advance_Buf(instream, buf);
    buf = InStream_Buf(instream, num_prox * C32_MAX_BYTES);
    while (num_prox--) {
        position += NumUtil_decode_c32(&buf);
        *positions++ = position;
    }

    InStream_Advance_Buf(instream, buf);
}
Exemple #7
0
static void
test_Buf(TestBatchRunner *runner) {
    RAMFile    *file      = RAMFile_new(NULL, false);
    OutStream  *outstream = OutStream_open((Obj*)file);
    size_t      size      = IO_STREAM_BUF_SIZE * 2 + 5;
    InStream   *instream;
    char       *buf;

    for (uint32_t i = 0; i < size; i++) {
        OutStream_Write_U8(outstream, 'a');
    }
    OutStream_Close(outstream);

    instream = InStream_open((Obj*)file);
    InStreamIVARS *const ivars = InStream_IVARS(instream);
    buf = InStream_Buf(instream, 5);
    TEST_INT_EQ(runner, ivars->limit - buf, IO_STREAM_BUF_SIZE,
                "Small request bumped up");

    buf += IO_STREAM_BUF_SIZE - 10; // 10 bytes left in buffer.
    InStream_Advance_Buf(instream, buf);

    buf = InStream_Buf(instream, 10);
    TEST_INT_EQ(runner, ivars->limit - buf, 10,
                "Exact request doesn't trigger refill");

    buf = InStream_Buf(instream, 11);
    TEST_INT_EQ(runner, ivars->limit - buf, IO_STREAM_BUF_SIZE,
                "Requesting over limit triggers refill");

    int64_t  expected = InStream_Length(instream) - InStream_Tell(instream);
    char    *buff     = InStream_Buf(instream, 100000);
    int64_t  got      = PTR_TO_I64(ivars->limit) - PTR_TO_I64(buff);
    TEST_TRUE(runner, got == expected,
              "Requests greater than file size get pared down");

    DECREF(instream);
    DECREF(outstream);
    DECREF(file);
}
Exemple #8
0
Obj*
Json_slurp_json(Folder *folder, const CharBuf *path) {
    InStream *instream = Folder_Open_In(folder, path);
    if (!instream) {
        ERR_ADD_FRAME(Err_get_error());
        return NULL;
    }
    size_t len = (size_t)InStream_Length(instream);
    char *buf = InStream_Buf(instream, len);
    Obj *dump = S_parse_json(buf, len);
    InStream_Close(instream);
    DECREF(instream);
    if (!dump) {
        ERR_ADD_FRAME(Err_get_error());
    }
    return dump;
}
SortCache*
SortCache_init(SortCache *self, Schema *schema, Folder *folder,
               Segment *segment, i32_t field_num)
{
    CharBuf *field    = Seg_Field_Name(segment, field_num);
    CharBuf *seg_name = Seg_Get_Name(segment);
    CharBuf *ord_file = CB_newf("%o/sort-%i32.ord", seg_name, field_num);
    CharBuf *ix_file  = CB_newf("%o/sort-%i32.ix",  seg_name, field_num);
    CharBuf *dat_file = CB_newf("%o/sort-%i32.dat", seg_name, field_num);
    i64_t ord_len, ix_len, dat_len;

    /* Derive. */
    self->doc_max = Seg_Get_Count(segment);
    self->type    = Schema_Fetch_Type(schema, field);
    if (!self->type || !FType_Sortable(self->type)) {
        THROW("'%o' isn't a sortable field", field);
    }

    /* Open instreams. */
    self->ord_in  = Folder_Open_In(folder, ord_file);
    self->ix_in   = Folder_Open_In(folder, ix_file);
    self->dat_in  = Folder_Open_In(folder, dat_file);
    if (!self->ix_in || !self->dat_in || !self->ord_in) {
        CharBuf *mess = MAKE_MESS("Can't open either %o, %o or %o", ord_file, 
            ix_file, dat_file);
        DECREF(ord_file);
        DECREF(ix_file);
        DECREF(dat_file);
        Err_throw_mess(mess);
    }
    ord_len = InStream_Length(self->ord_in);
    ix_len  = InStream_Length(self->ix_in);
    dat_len = InStream_Length(self->dat_in);

    /* Calculate the number of unique values and derive the ord bit width. */
    self->num_uniq = (i32_t)(ix_len / 8) - 1; 
    self->width    = S_calc_width(self->num_uniq);

    /* Validate file lengths. */
    {
        double bytes_per_doc = self->width / 8.0;
        double max_ords      = ord_len / bytes_per_doc;
        if (max_ords < self->doc_max + 1) {
            THROW("Conflict between ord count max %f64 and doc_max %i32", 
                max_ords, self->doc_max);
        }
    }

    /* Mmap ords, offsets and character data. */
    self->ords      = InStream_Buf(self->ord_in, (size_t)ord_len);
    self->offsets   = (i64_t*)InStream_Buf(self->ix_in, (size_t)ix_len);
    self->char_data = InStream_Buf(self->dat_in, dat_len);
    {
        char *offs            = (char*)self->offsets;
        self->offsets_limit   = (i64_t*)(offs + ix_len);
        self->char_data_limit = self->char_data + dat_len;
    }

    DECREF(ord_file);
    DECREF(ix_file);
    DECREF(dat_file);

    return self;
}