コード例 #1
0
ファイル: Freezer.c プロジェクト: rectang/lucy
Blob*
Freezer_deserialize_blob(Blob *blob, InStream *instream) {
    size_t size = InStream_Read_C32(instream);
    char   *buf = (char*)MALLOCATE(size);
    InStream_Read_Bytes(instream, buf, size);
    return Blob_init_steal(blob, buf, size);
}
コード例 #2
0
ファイル: TextSortCache.c プロジェクト: carriercomm/lucy
Obj*
TextSortCache_Value_IMP(TextSortCache *self, int32_t ord) {
    TextSortCacheIVARS *const ivars = TextSortCache_IVARS(self);
    if (ord == ivars->null_ord) {
        return NULL;
    }
    InStream_Seek(ivars->ix_in, ord * sizeof(int64_t));
    int64_t offset = InStream_Read_I64(ivars->ix_in);
    if (offset == NULL_SENTINEL) {
        return NULL;
    }
    else {
        uint32_t next_ord = ord + 1;
        int64_t next_offset;
        while (1) {
            InStream_Seek(ivars->ix_in, next_ord * sizeof(int64_t));
            next_offset = InStream_Read_I64(ivars->ix_in);
            if (next_offset != NULL_SENTINEL) { break; }
            next_ord++;
        }

        // Read character data into String.
        size_t len = (size_t)(next_offset - offset);
        char *ptr = (char*)MALLOCATE(len + 1);
        InStream_Seek(ivars->dat_in, offset);
        InStream_Read_Bytes(ivars->dat_in, ptr, len);
        ptr[len] = '\0';
        return (Obj*)Str_new_steal_utf8(ptr, len);
    }
}
コード例 #3
0
ファイル: Folder.c プロジェクト: theory/lucy
ByteBuf*
Folder_slurp_file(Folder *self, const CharBuf *path) {
    InStream *instream = Folder_Open_In(self, path);
    ByteBuf  *retval   = NULL;

    if (!instream) {
        RETHROW(INCREF(Err_get_error()));
    }
    else {
        uint64_t length = InStream_Length(instream);

        if (length >= SIZE_MAX) {
            InStream_Close(instream);
            DECREF(instream);
            THROW(ERR, "File %o is too big to slurp (%u64 bytes)", path,
                  length);
        }
        else {
            size_t size = (size_t)length;
            char *ptr = (char*)MALLOCATE((size_t)size + 1);
            InStream_Read_Bytes(instream, ptr, size);
            ptr[size] = '\0';
            retval = BB_new_steal_bytes(ptr, size, size + 1);
            InStream_Close(instream);
            DECREF(instream);
        }
    }

    return retval;
}
コード例 #4
0
ファイル: Hash.c プロジェクト: pavansondur/lucy
Hash*
Hash_deserialize(Hash *self, InStream *instream) {
    uint32_t size         = InStream_Read_C32(instream);
    uint32_t num_charbufs = InStream_Read_C32(instream);
    uint32_t num_other    = size - num_charbufs;
    CharBuf *key          = num_charbufs ? CB_new(0) : NULL;

    Hash_init(self, size);

    // Read key-value pairs with CharBuf keys.
    while (num_charbufs--) {
        uint32_t len = InStream_Read_C32(instream);
        char *key_buf = CB_Grow(key, len);
        InStream_Read_Bytes(instream, key_buf, len);
        key_buf[len] = '\0';
        CB_Set_Size(key, len);
        Hash_Store(self, (Obj*)key, THAW(instream));
    }
    DECREF(key);

    // Read remaining key/value pairs.
    while (num_other--) {
        Obj *k = THAW(instream);
        Hash_Store(self, k, THAW(instream));
        DECREF(k);
    }

    return self;
}
コード例 #5
0
ファイル: TextSortCache.c プロジェクト: gitpan/KinoSearch
Obj*
TextSortCache_value(TextSortCache *self, int32_t ord, Obj *blank)
{
    if (ord == self->null_ord) {
        return NULL;
    }
    InStream_Seek(self->ix_in, ord * sizeof(int64_t));
    int64_t offset = InStream_Read_I64(self->ix_in);
    if (offset == NULL_SENTINEL) { 
        return NULL; 
    }
    else {
        uint32_t next_ord = ord + 1;
        int64_t next_offset;
        while (1) {
            InStream_Seek(self->ix_in, next_ord * sizeof(int64_t));
            next_offset = InStream_Read_I64(self->ix_in);
            if (next_offset != NULL_SENTINEL) { break; }
            next_ord++;
        }

        // Read character data into CharBuf. 
        CERTIFY(blank, CHARBUF);
        int64_t len = next_offset - offset;
        char *ptr = CB_Grow((CharBuf*)blank, (size_t)len);
        InStream_Seek(self->dat_in, offset);
        InStream_Read_Bytes(self->dat_in, ptr, (size_t)len);
        ptr[len] = '\0';
        CB_Set_Size((CharBuf*)blank, (size_t)len);
    }
    return blank;
}
コード例 #6
0
ファイル: TextType.c プロジェクト: gitpan/KinoSearch
void
TextTermStepper_read_key_frame(TextTermStepper *self, InStream *instream)
{ 
    const uint32_t text_len = InStream_Read_C32(instream);
    CharBuf *value;
    char *ptr;

    // Allocate space. 
    if (self->value == NULL) {
        self->value = (Obj*)CB_new(text_len);
    }
    value = (CharBuf*)self->value;
    ptr   = CB_Grow(value, text_len);

    // Set the value text. 
    InStream_Read_Bytes(instream, ptr, text_len);
    CB_Set_Size(value, text_len);
    if (!StrHelp_utf8_valid(ptr, text_len)) {
        THROW(ERR, "Invalid UTF-8 sequence in '%o' at byte %i64",
            InStream_Get_Filename(instream), 
            InStream_Tell(instream) - text_len);
    }

    // Null-terminate. 
    ptr[text_len] = '\0';
}
コード例 #7
0
ファイル: TextType.c プロジェクト: kidaa/lucy
void
TextTermStepper_Read_Delta_IMP(TextTermStepper *self, InStream *instream) {
    TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self);
    const uint32_t text_overlap     = InStream_Read_C32(instream);
    const uint32_t finish_chars_len = InStream_Read_C32(instream);
    const uint32_t total_text_len   = text_overlap + finish_chars_len;

    // Allocate space.
    CharBuf *charbuf = (CharBuf*)ivars->value;
    char    *ptr     = CB_Grow(charbuf, total_text_len);

    // Set the value text.
    InStream_Read_Bytes(instream, ptr + text_overlap, finish_chars_len);
    CB_Set_Size(charbuf, total_text_len);
    if (!StrHelp_utf8_valid(ptr, total_text_len)) {
        THROW(ERR, "Invalid UTF-8 sequence in '%o' at byte %i64",
              InStream_Get_Filename(instream),
              InStream_Tell(instream) - finish_chars_len);
    }

    // Null-terminate.
    ptr[total_text_len] = '\0';

    // Invalidate string.
    DECREF(ivars->string);
    ivars->string = NULL;
}
コード例 #8
0
ファイル: TestInStream.c プロジェクト: rbevers/lucy
static void
test_refill(TestBatchRunner *runner) {
    RAMFile    *file      = RAMFile_new(NULL, false);
    OutStream  *outstream = OutStream_open((Obj*)file);
    InStream   *instream;
    char        scratch[5];
    InStreamIVARS *ivars;

    for (int32_t i = 0; i < 1023; i++) {
        OutStream_Write_U8(outstream, 'x');
    }
    OutStream_Write_U8(outstream, 'y');
    OutStream_Write_U8(outstream, 'z');
    OutStream_Close(outstream);

    instream = InStream_open((Obj*)file);
    ivars = InStream_IVARS(instream);
    InStream_Refill(instream);
    TEST_INT_EQ(runner, ivars->limit - ivars->buf, IO_STREAM_BUF_SIZE,
                "Refill");
    TEST_INT_EQ(runner, (long)InStream_Tell(instream), 0,
                "Correct file pos after standing-start Refill()");
    DECREF(instream);

    instream = InStream_open((Obj*)file);
    ivars = InStream_IVARS(instream);
    InStream_Fill(instream, 30);
    TEST_INT_EQ(runner, ivars->limit - ivars->buf, 30, "Fill()");
    TEST_INT_EQ(runner, (long)InStream_Tell(instream), 0,
                "Correct file pos after standing-start Fill()");
    DECREF(instream);

    instream = InStream_open((Obj*)file);
    ivars = InStream_IVARS(instream);
    InStream_Read_Bytes(instream, scratch, 5);
    TEST_INT_EQ(runner, ivars->limit - ivars->buf,
                IO_STREAM_BUF_SIZE - 5, "small read triggers refill");
    DECREF(instream);

    instream = InStream_open((Obj*)file);
    ivars = InStream_IVARS(instream);
    TEST_INT_EQ(runner, InStream_Read_U8(instream), 'x', "Read_U8");
    InStream_Seek(instream, 1023);
    TEST_INT_EQ(runner, (long)FileWindow_IVARS(ivars->window)->offset, 0,
                "no unnecessary refill on Seek");
    TEST_INT_EQ(runner, (long)InStream_Tell(instream), 1023, "Seek/Tell");
    TEST_INT_EQ(runner, InStream_Read_U8(instream), 'y',
                "correct data after in-buffer Seek()");
    TEST_INT_EQ(runner, InStream_Read_U8(instream), 'z', "automatic Refill");
    TEST_TRUE(runner, (FileWindow_IVARS(ivars->window)->offset != 0),
              "refilled");

    DECREF(instream);
    DECREF(outstream);
    DECREF(file);
}
コード例 #9
0
ファイル: Freezer.c プロジェクト: rectang/lucy
String*
Freezer_deserialize_string(String *string, InStream *instream) {
    size_t size = InStream_Read_C32(instream);
    if (size == SIZE_MAX) {
        THROW(ERR, "Can't deserialize SIZE_MAX bytes");
    }
    char *buf = (char*)MALLOCATE(size + 1);
    InStream_Read_Bytes(instream, buf, size);
    buf[size] = '\0';
    if (!StrHelp_utf8_valid(buf, size)) {
        THROW(ERR, "Attempt to deserialize invalid UTF-8");
    }
    return Str_init_steal_trusted_utf8(string, buf, size);
}
コード例 #10
0
ファイル: DocReader.c プロジェクト: kidaa/lucy
void
DefDocReader_Read_Record_IMP(DefaultDocReader *self, ByteBuf *buffer,
                             int32_t doc_id) {
    DefaultDocReaderIVARS *const ivars = DefDocReader_IVARS(self);

    // Find start and length of variable length record.
    InStream_Seek(ivars->ix_in, (int64_t)doc_id * 8);
    int64_t start = InStream_Read_I64(ivars->ix_in);
    int64_t end   = InStream_Read_I64(ivars->ix_in);
    size_t size  = (size_t)(end - start);

    // Read in the record.
    char *buf = BB_Grow(buffer, size);
    InStream_Seek(ivars->dat_in, start);
    InStream_Read_Bytes(ivars->dat_in, buf, size);
    BB_Set_Size(buffer, size);
}
コード例 #11
0
ファイル: HighlightReader.c プロジェクト: pavansondur/lucy
void
DefHLReader_read_record(DefaultHighlightReader *self, int32_t doc_id,
                        ByteBuf *target) {
    InStream *dat_in = self->dat_in;
    InStream *ix_in  = self->ix_in;

    InStream_Seek(ix_in, doc_id * 8);

    // Copy the whole record.
    int64_t  filepos = InStream_Read_I64(ix_in);
    int64_t  end     = InStream_Read_I64(ix_in);
    size_t   size    = (size_t)(end - filepos);
    char    *buf     = BB_Grow(target, size);
    InStream_Seek(dat_in, filepos);
    InStream_Read_Bytes(dat_in, buf, size);
    BB_Set_Size(target, size);
}
コード例 #12
0
ファイル: Freezer.c プロジェクト: rectang/lucy
Hash*
Freezer_deserialize_hash(Hash *hash, InStream *instream) {
    uint32_t size = InStream_Read_C32(instream);

    Hash_init(hash, size);

    while (size--) {
        uint32_t len = InStream_Read_C32(instream);
        char *key_buf = (char*)MALLOCATE(len + 1);
        InStream_Read_Bytes(instream, key_buf, len);
        key_buf[len] = '\0';
        String *key = Str_new_steal_utf8(key_buf, len);
        Hash_Store(hash, key, THAW(instream));
        DECREF(key);
    }

    return hash;
}
コード例 #13
0
ファイル: TestIOChunks.c プロジェクト: kstarsinic/lucy
static void
test_Read_Write_Bytes(TestBatchRunner *runner) {
    RAMFile    *file      = RAMFile_new(NULL, false);
    OutStream  *outstream = OutStream_open((Obj*)file);
    InStream   *instream;
    char        buf[4];

    OutStream_Write_Bytes(outstream, "foo", 4);
    OutStream_Close(outstream);

    instream = InStream_open((Obj*)file);
    InStream_Read_Bytes(instream, buf, 4);
    TEST_TRUE(runner, strcmp(buf, "foo") == 0, "Read_Bytes Write_Bytes");

    DECREF(instream);
    DECREF(outstream);
    DECREF(file);
}
コード例 #14
0
void
DefDocReader_read_record(DefaultDocReader *self, ByteBuf *buffer,
                         i32_t doc_id)
{
    i64_t start;
    i64_t end;
    i32_t size;

    /* Find start and length of variable length record. */
    InStream_Seek(self->ix_in, (i64_t)doc_id * 8);
    start = InStream_Read_U64(self->ix_in);
    end   = InStream_Read_U64(self->ix_in);
    size  = end - start;

    /* Read in the record. */
    BB_Grow(buffer, size);
    InStream_Seek(self->dat_in, start);
    InStream_Read_Bytes(self->dat_in, buffer->ptr, size);
    BB_Set_Size(buffer, size);
}
コード例 #15
0
ファイル: OutStream.c プロジェクト: kidaa/lucy
void
OutStream_Absorb_IMP(OutStream *self, InStream *instream) {
    OutStreamIVARS *const ivars = OutStream_IVARS(self);
    char buf[IO_STREAM_BUF_SIZE];
    int64_t bytes_left = InStream_Length(instream);

    // Read blocks of content into an intermediate buffer, than write them to
    // the OutStream.
    //
    // TODO: optimize by utilizing OutStream's buffer directly, while still
    // not flushing too frequently and keeping code complexity under control.
    OutStream_Grow(self, OutStream_Tell(self) + bytes_left);
    while (bytes_left) {
        const size_t bytes_this_iter = bytes_left < IO_STREAM_BUF_SIZE
                                       ? (size_t)bytes_left
                                       : IO_STREAM_BUF_SIZE;
        InStream_Read_Bytes(instream, buf, bytes_this_iter);
        SI_write_bytes(self, ivars, buf, bytes_this_iter);
        bytes_left -= bytes_this_iter;
    }
}
コード例 #16
0
ファイル: DocReader.c プロジェクト: hernan604/lucy
HitDoc*
DefDocReader_Fetch_Doc_IMP(DefaultDocReader *self, int32_t doc_id) {
    DefaultDocReaderIVARS *const ivars = DefDocReader_IVARS(self);
    Schema   *const schema = ivars->schema;
    InStream *const dat_in = ivars->dat_in;
    InStream *const ix_in  = ivars->ix_in;
    Hash     *const fields = Hash_new(1);
    int64_t   start;
    uint32_t  num_fields;
    uint32_t  field_name_cap = 31;
    char     *field_name = (char*)MALLOCATE(field_name_cap + 1);

    // Get data file pointer from index, read number of fields.
    InStream_Seek(ix_in, (int64_t)doc_id * 8);
    start = InStream_Read_U64(ix_in);
    InStream_Seek(dat_in, start);
    num_fields = InStream_Read_C32(dat_in);

    // Decode stored data and build up the doc field by field.
    while (num_fields--) {
        uint32_t        field_name_len;
        Obj       *value;
        FieldType *type;

        // Read field name.
        field_name_len = InStream_Read_C32(dat_in);
        if (field_name_len > field_name_cap) {
            field_name_cap = field_name_len;
            field_name     = (char*)REALLOCATE(field_name,
                                                    field_name_cap + 1);
        }
        InStream_Read_Bytes(dat_in, field_name, field_name_len);

        // Find the Field's FieldType.
        StackString *field_name_str
            = SSTR_WRAP_UTF8(field_name, field_name_len);
        type = Schema_Fetch_Type(schema, (String*)field_name_str);

        // Read the field value.
        switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) {
            case FType_TEXT: {
                    uint32_t value_len = InStream_Read_C32(dat_in);
                    char *buf = (char*)MALLOCATE(value_len + 1);
                    InStream_Read_Bytes(dat_in, buf, value_len);
                    buf[value_len] = '\0'; 
                    value = (Obj*)Str_new_steal_utf8(buf, value_len);
                    break;
                }
            case FType_BLOB: {
                    uint32_t value_len = InStream_Read_C32(dat_in);
                    char *buf = (char*)MALLOCATE(value_len);
                    InStream_Read_Bytes(dat_in, buf, value_len);
                    value = (Obj*)BB_new_steal_bytes(
                                buf, value_len, value_len);
                    break;
                }
            case FType_FLOAT32:
                value = (Obj*)Float32_new(
                                InStream_Read_F32(dat_in));
                break;
            case FType_FLOAT64:
                value = (Obj*)Float64_new(
                                InStream_Read_F64(dat_in));
                break;
            case FType_INT32:
                value = (Obj*)Int32_new(
                                (int32_t)InStream_Read_C32(dat_in));
                break;
            case FType_INT64:
                value = (Obj*)Int64_new(
                                (int64_t)InStream_Read_C64(dat_in));
                break;
            default:
                value = NULL;
                THROW(ERR, "Unrecognized type: %o", type);
        }

        // Store the value.
        Hash_Store_Utf8(fields, field_name, field_name_len, value);
    }
    FREEMEM(field_name);

    HitDoc *retval = HitDoc_new(fields, doc_id, 0.0);
    DECREF(fields);
    return retval;
}