Blob* Freezer_deserialize_blob(Blob *blob, InStream *instream) { size_t size = InStream_Read_C32(instream); char *buf = (char*)MALLOCATE(size); InStream_Read_Bytes(instream, buf, size); return Blob_init_steal(blob, buf, size); }
Obj* TextSortCache_Value_IMP(TextSortCache *self, int32_t ord) { TextSortCacheIVARS *const ivars = TextSortCache_IVARS(self); if (ord == ivars->null_ord) { return NULL; } InStream_Seek(ivars->ix_in, ord * sizeof(int64_t)); int64_t offset = InStream_Read_I64(ivars->ix_in); if (offset == NULL_SENTINEL) { return NULL; } else { uint32_t next_ord = ord + 1; int64_t next_offset; while (1) { InStream_Seek(ivars->ix_in, next_ord * sizeof(int64_t)); next_offset = InStream_Read_I64(ivars->ix_in); if (next_offset != NULL_SENTINEL) { break; } next_ord++; } // Read character data into String. size_t len = (size_t)(next_offset - offset); char *ptr = (char*)MALLOCATE(len + 1); InStream_Seek(ivars->dat_in, offset); InStream_Read_Bytes(ivars->dat_in, ptr, len); ptr[len] = '\0'; return (Obj*)Str_new_steal_utf8(ptr, len); } }
ByteBuf* Folder_slurp_file(Folder *self, const CharBuf *path) { InStream *instream = Folder_Open_In(self, path); ByteBuf *retval = NULL; if (!instream) { RETHROW(INCREF(Err_get_error())); } else { uint64_t length = InStream_Length(instream); if (length >= SIZE_MAX) { InStream_Close(instream); DECREF(instream); THROW(ERR, "File %o is too big to slurp (%u64 bytes)", path, length); } else { size_t size = (size_t)length; char *ptr = (char*)MALLOCATE((size_t)size + 1); InStream_Read_Bytes(instream, ptr, size); ptr[size] = '\0'; retval = BB_new_steal_bytes(ptr, size, size + 1); InStream_Close(instream); DECREF(instream); } } return retval; }
Hash* Hash_deserialize(Hash *self, InStream *instream) { uint32_t size = InStream_Read_C32(instream); uint32_t num_charbufs = InStream_Read_C32(instream); uint32_t num_other = size - num_charbufs; CharBuf *key = num_charbufs ? CB_new(0) : NULL; Hash_init(self, size); // Read key-value pairs with CharBuf keys. while (num_charbufs--) { uint32_t len = InStream_Read_C32(instream); char *key_buf = CB_Grow(key, len); InStream_Read_Bytes(instream, key_buf, len); key_buf[len] = '\0'; CB_Set_Size(key, len); Hash_Store(self, (Obj*)key, THAW(instream)); } DECREF(key); // Read remaining key/value pairs. while (num_other--) { Obj *k = THAW(instream); Hash_Store(self, k, THAW(instream)); DECREF(k); } return self; }
Obj* TextSortCache_value(TextSortCache *self, int32_t ord, Obj *blank) { if (ord == self->null_ord) { return NULL; } InStream_Seek(self->ix_in, ord * sizeof(int64_t)); int64_t offset = InStream_Read_I64(self->ix_in); if (offset == NULL_SENTINEL) { return NULL; } else { uint32_t next_ord = ord + 1; int64_t next_offset; while (1) { InStream_Seek(self->ix_in, next_ord * sizeof(int64_t)); next_offset = InStream_Read_I64(self->ix_in); if (next_offset != NULL_SENTINEL) { break; } next_ord++; } // Read character data into CharBuf. CERTIFY(blank, CHARBUF); int64_t len = next_offset - offset; char *ptr = CB_Grow((CharBuf*)blank, (size_t)len); InStream_Seek(self->dat_in, offset); InStream_Read_Bytes(self->dat_in, ptr, (size_t)len); ptr[len] = '\0'; CB_Set_Size((CharBuf*)blank, (size_t)len); } return blank; }
void TextTermStepper_read_key_frame(TextTermStepper *self, InStream *instream) { const uint32_t text_len = InStream_Read_C32(instream); CharBuf *value; char *ptr; // Allocate space. if (self->value == NULL) { self->value = (Obj*)CB_new(text_len); } value = (CharBuf*)self->value; ptr = CB_Grow(value, text_len); // Set the value text. InStream_Read_Bytes(instream, ptr, text_len); CB_Set_Size(value, text_len); if (!StrHelp_utf8_valid(ptr, text_len)) { THROW(ERR, "Invalid UTF-8 sequence in '%o' at byte %i64", InStream_Get_Filename(instream), InStream_Tell(instream) - text_len); } // Null-terminate. ptr[text_len] = '\0'; }
void TextTermStepper_Read_Delta_IMP(TextTermStepper *self, InStream *instream) { TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); const uint32_t text_overlap = InStream_Read_C32(instream); const uint32_t finish_chars_len = InStream_Read_C32(instream); const uint32_t total_text_len = text_overlap + finish_chars_len; // Allocate space. CharBuf *charbuf = (CharBuf*)ivars->value; char *ptr = CB_Grow(charbuf, total_text_len); // Set the value text. InStream_Read_Bytes(instream, ptr + text_overlap, finish_chars_len); CB_Set_Size(charbuf, total_text_len); if (!StrHelp_utf8_valid(ptr, total_text_len)) { THROW(ERR, "Invalid UTF-8 sequence in '%o' at byte %i64", InStream_Get_Filename(instream), InStream_Tell(instream) - finish_chars_len); } // Null-terminate. ptr[total_text_len] = '\0'; // Invalidate string. DECREF(ivars->string); ivars->string = NULL; }
static void test_refill(TestBatchRunner *runner) { RAMFile *file = RAMFile_new(NULL, false); OutStream *outstream = OutStream_open((Obj*)file); InStream *instream; char scratch[5]; InStreamIVARS *ivars; for (int32_t i = 0; i < 1023; i++) { OutStream_Write_U8(outstream, 'x'); } OutStream_Write_U8(outstream, 'y'); OutStream_Write_U8(outstream, 'z'); OutStream_Close(outstream); instream = InStream_open((Obj*)file); ivars = InStream_IVARS(instream); InStream_Refill(instream); TEST_INT_EQ(runner, ivars->limit - ivars->buf, IO_STREAM_BUF_SIZE, "Refill"); TEST_INT_EQ(runner, (long)InStream_Tell(instream), 0, "Correct file pos after standing-start Refill()"); DECREF(instream); instream = InStream_open((Obj*)file); ivars = InStream_IVARS(instream); InStream_Fill(instream, 30); TEST_INT_EQ(runner, ivars->limit - ivars->buf, 30, "Fill()"); TEST_INT_EQ(runner, (long)InStream_Tell(instream), 0, "Correct file pos after standing-start Fill()"); DECREF(instream); instream = InStream_open((Obj*)file); ivars = InStream_IVARS(instream); InStream_Read_Bytes(instream, scratch, 5); TEST_INT_EQ(runner, ivars->limit - ivars->buf, IO_STREAM_BUF_SIZE - 5, "small read triggers refill"); DECREF(instream); instream = InStream_open((Obj*)file); ivars = InStream_IVARS(instream); TEST_INT_EQ(runner, InStream_Read_U8(instream), 'x', "Read_U8"); InStream_Seek(instream, 1023); TEST_INT_EQ(runner, (long)FileWindow_IVARS(ivars->window)->offset, 0, "no unnecessary refill on Seek"); TEST_INT_EQ(runner, (long)InStream_Tell(instream), 1023, "Seek/Tell"); TEST_INT_EQ(runner, InStream_Read_U8(instream), 'y', "correct data after in-buffer Seek()"); TEST_INT_EQ(runner, InStream_Read_U8(instream), 'z', "automatic Refill"); TEST_TRUE(runner, (FileWindow_IVARS(ivars->window)->offset != 0), "refilled"); DECREF(instream); DECREF(outstream); DECREF(file); }
String* Freezer_deserialize_string(String *string, InStream *instream) { size_t size = InStream_Read_C32(instream); if (size == SIZE_MAX) { THROW(ERR, "Can't deserialize SIZE_MAX bytes"); } char *buf = (char*)MALLOCATE(size + 1); InStream_Read_Bytes(instream, buf, size); buf[size] = '\0'; if (!StrHelp_utf8_valid(buf, size)) { THROW(ERR, "Attempt to deserialize invalid UTF-8"); } return Str_init_steal_trusted_utf8(string, buf, size); }
void DefDocReader_Read_Record_IMP(DefaultDocReader *self, ByteBuf *buffer, int32_t doc_id) { DefaultDocReaderIVARS *const ivars = DefDocReader_IVARS(self); // Find start and length of variable length record. InStream_Seek(ivars->ix_in, (int64_t)doc_id * 8); int64_t start = InStream_Read_I64(ivars->ix_in); int64_t end = InStream_Read_I64(ivars->ix_in); size_t size = (size_t)(end - start); // Read in the record. char *buf = BB_Grow(buffer, size); InStream_Seek(ivars->dat_in, start); InStream_Read_Bytes(ivars->dat_in, buf, size); BB_Set_Size(buffer, size); }
void DefHLReader_read_record(DefaultHighlightReader *self, int32_t doc_id, ByteBuf *target) { InStream *dat_in = self->dat_in; InStream *ix_in = self->ix_in; InStream_Seek(ix_in, doc_id * 8); // Copy the whole record. int64_t filepos = InStream_Read_I64(ix_in); int64_t end = InStream_Read_I64(ix_in); size_t size = (size_t)(end - filepos); char *buf = BB_Grow(target, size); InStream_Seek(dat_in, filepos); InStream_Read_Bytes(dat_in, buf, size); BB_Set_Size(target, size); }
Hash* Freezer_deserialize_hash(Hash *hash, InStream *instream) { uint32_t size = InStream_Read_C32(instream); Hash_init(hash, size); while (size--) { uint32_t len = InStream_Read_C32(instream); char *key_buf = (char*)MALLOCATE(len + 1); InStream_Read_Bytes(instream, key_buf, len); key_buf[len] = '\0'; String *key = Str_new_steal_utf8(key_buf, len); Hash_Store(hash, key, THAW(instream)); DECREF(key); } return hash; }
static void test_Read_Write_Bytes(TestBatchRunner *runner) { RAMFile *file = RAMFile_new(NULL, false); OutStream *outstream = OutStream_open((Obj*)file); InStream *instream; char buf[4]; OutStream_Write_Bytes(outstream, "foo", 4); OutStream_Close(outstream); instream = InStream_open((Obj*)file); InStream_Read_Bytes(instream, buf, 4); TEST_TRUE(runner, strcmp(buf, "foo") == 0, "Read_Bytes Write_Bytes"); DECREF(instream); DECREF(outstream); DECREF(file); }
void DefDocReader_read_record(DefaultDocReader *self, ByteBuf *buffer, i32_t doc_id) { i64_t start; i64_t end; i32_t size; /* Find start and length of variable length record. */ InStream_Seek(self->ix_in, (i64_t)doc_id * 8); start = InStream_Read_U64(self->ix_in); end = InStream_Read_U64(self->ix_in); size = end - start; /* Read in the record. */ BB_Grow(buffer, size); InStream_Seek(self->dat_in, start); InStream_Read_Bytes(self->dat_in, buffer->ptr, size); BB_Set_Size(buffer, size); }
void OutStream_Absorb_IMP(OutStream *self, InStream *instream) { OutStreamIVARS *const ivars = OutStream_IVARS(self); char buf[IO_STREAM_BUF_SIZE]; int64_t bytes_left = InStream_Length(instream); // Read blocks of content into an intermediate buffer, than write them to // the OutStream. // // TODO: optimize by utilizing OutStream's buffer directly, while still // not flushing too frequently and keeping code complexity under control. OutStream_Grow(self, OutStream_Tell(self) + bytes_left); while (bytes_left) { const size_t bytes_this_iter = bytes_left < IO_STREAM_BUF_SIZE ? (size_t)bytes_left : IO_STREAM_BUF_SIZE; InStream_Read_Bytes(instream, buf, bytes_this_iter); SI_write_bytes(self, ivars, buf, bytes_this_iter); bytes_left -= bytes_this_iter; } }
HitDoc* DefDocReader_Fetch_Doc_IMP(DefaultDocReader *self, int32_t doc_id) { DefaultDocReaderIVARS *const ivars = DefDocReader_IVARS(self); Schema *const schema = ivars->schema; InStream *const dat_in = ivars->dat_in; InStream *const ix_in = ivars->ix_in; Hash *const fields = Hash_new(1); int64_t start; uint32_t num_fields; uint32_t field_name_cap = 31; char *field_name = (char*)MALLOCATE(field_name_cap + 1); // Get data file pointer from index, read number of fields. InStream_Seek(ix_in, (int64_t)doc_id * 8); start = InStream_Read_U64(ix_in); InStream_Seek(dat_in, start); num_fields = InStream_Read_C32(dat_in); // Decode stored data and build up the doc field by field. while (num_fields--) { uint32_t field_name_len; Obj *value; FieldType *type; // Read field name. field_name_len = InStream_Read_C32(dat_in); if (field_name_len > field_name_cap) { field_name_cap = field_name_len; field_name = (char*)REALLOCATE(field_name, field_name_cap + 1); } InStream_Read_Bytes(dat_in, field_name, field_name_len); // Find the Field's FieldType. StackString *field_name_str = SSTR_WRAP_UTF8(field_name, field_name_len); type = Schema_Fetch_Type(schema, (String*)field_name_str); // Read the field value. switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: { uint32_t value_len = InStream_Read_C32(dat_in); char *buf = (char*)MALLOCATE(value_len + 1); InStream_Read_Bytes(dat_in, buf, value_len); buf[value_len] = '\0'; value = (Obj*)Str_new_steal_utf8(buf, value_len); break; } case FType_BLOB: { uint32_t value_len = InStream_Read_C32(dat_in); char *buf = (char*)MALLOCATE(value_len); InStream_Read_Bytes(dat_in, buf, value_len); value = (Obj*)BB_new_steal_bytes( buf, value_len, value_len); break; } case FType_FLOAT32: value = (Obj*)Float32_new( InStream_Read_F32(dat_in)); break; case FType_FLOAT64: value = (Obj*)Float64_new( InStream_Read_F64(dat_in)); break; case FType_INT32: value = (Obj*)Int32_new( (int32_t)InStream_Read_C32(dat_in)); break; case FType_INT64: value = (Obj*)Int64_new( (int64_t)InStream_Read_C64(dat_in)); break; default: value = NULL; THROW(ERR, "Unrecognized type: %o", type); } // Store the value. Hash_Store_Utf8(fields, field_name, field_name_len, value); } FREEMEM(field_name); HitDoc *retval = HitDoc_new(fields, doc_id, 0.0); DECREF(fields); return retval; }