static void test_Seek_and_Tell(TestBatchRunner *runner) { int64_t gb1 = INT64_C(0x40000000); int64_t gb3 = gb1 * 3; int64_t gb6 = gb1 * 6; int64_t gb12 = gb1 * 12; FileHandle *fh = (FileHandle*)MockFileHandle_new(NULL, gb12); InStream *instream = InStream_open((Obj*)fh); InStreamIVARS *const ivars = InStream_IVARS(instream); InStream_Buf(instream, 10000); TEST_TRUE(runner, ivars->limit == ((char*)NULL) + 10000, "InStream_Buf sets limit"); InStream_Seek(instream, gb6); TEST_TRUE(runner, InStream_Tell(instream) == gb6, "Tell after seek forwards outside buffer"); TEST_TRUE(runner, ivars->buf == NULL, "Seek forwards outside buffer sets buf to NULL"); TEST_TRUE(runner, ivars->limit == NULL, "Seek forwards outside buffer sets limit to NULL"); TEST_TRUE(runner, FileWindow_IVARS(ivars->window)->offset == gb6, "Seek forwards outside buffer tracks pos in window offset"); InStream_Buf(instream, (size_t)gb1); TEST_TRUE(runner, ivars->limit == ((char*)NULL) + gb1, "InStream_Buf sets limit"); InStream_Seek(instream, gb6 + 10); TEST_TRUE(runner, InStream_Tell(instream) == gb6 + 10, "Tell after seek forwards within buffer"); TEST_TRUE(runner, ivars->buf == ((char*)NULL) + 10, "Seek within buffer sets buf"); TEST_TRUE(runner, ivars->limit == ((char*)NULL) + gb1, "Seek within buffer leaves limit alone"); InStream_Seek(instream, gb6 + 1); TEST_TRUE(runner, InStream_Tell(instream) == gb6 + 1, "Tell after seek backwards within buffer"); TEST_TRUE(runner, ivars->buf == ((char*)NULL) + 1, "Seek backwards within buffer sets buf"); TEST_TRUE(runner, ivars->limit == ((char*)NULL) + gb1, "Seek backwards within buffer leaves limit alone"); InStream_Seek(instream, gb3); TEST_TRUE(runner, InStream_Tell(instream) == gb3, "Tell after seek backwards outside buffer"); TEST_TRUE(runner, ivars->buf == NULL, "Seek backwards outside buffer sets buf to NULL"); TEST_TRUE(runner, ivars->limit == NULL, "Seek backwards outside buffer sets limit to NULL"); TEST_TRUE(runner, FileWindow_IVARS(ivars->window)->offset == gb3, "Seek backwards outside buffer tracks pos in window offset"); DECREF(instream); DECREF(fh); }
LexIndex* LexIndex_init(LexIndex *self, Schema *schema, Folder *folder, Segment *segment, const CharBuf *field) { i32_t field_num = Seg_Field_Num(segment, field); CharBuf *seg_name = Seg_Get_Name(segment); CharBuf *ixix_file = CB_newf("%o/lexicon-%i32.ixix", seg_name, field_num); CharBuf *ix_file = CB_newf("%o/lexicon-%i32.ix", seg_name, field_num); Architecture *arch = Schema_Get_Architecture(schema); /* Init. */ self->term = ViewCB_new_from_trusted_utf8(NULL, 0); self->tinfo = TInfo_new(0,0,0,0); self->tick = 0; /* Derive */ self->field_type = Schema_Fetch_Type(schema, field); if (!self->field_type) { CharBuf *mess = MAKE_MESS("Unknown field: '%o'", field); DECREF(ix_file); DECREF(ixix_file); DECREF(self); Err_throw_mess(mess); } INCREF(self->field_type); self->ixix_in = Folder_Open_In(folder, ixix_file); self->ix_in = Folder_Open_In(folder, ix_file); if (!self->ixix_in || !self->ix_in) { CharBuf *mess = MAKE_MESS("Can't open either %o or %o", ix_file, ixix_file); DECREF(ix_file); DECREF(ixix_file); DECREF(self); Err_throw_mess(mess); } self->index_interval = Arch_Index_Interval(arch); self->skip_interval = Arch_Skip_Interval(arch); self->size = (i32_t)(InStream_Length(self->ixix_in) / sizeof(i64_t)); self->offsets = (i64_t*)InStream_Buf(self->ixix_in, (size_t)InStream_Length(self->ixix_in)); self->data = InStream_Buf(self->ix_in, InStream_Length(self->ix_in)); self->limit = self->data + InStream_Length(self->ix_in); DECREF(ixix_file); DECREF(ix_file); return self; }
NumericSortCache* NumSortCache_init(NumericSortCache *self, const CharBuf *field, FieldType *type, int32_t cardinality, int32_t doc_max, int32_t null_ord, int32_t ord_width, InStream *ord_in, InStream *dat_in) { // Validate. if (!type || !FType_Sortable(type) || !FType_Is_A(type, NUMERICTYPE)) { DECREF(self); THROW(ERR, "'%o' isn't a sortable NumericType field", field); } // Mmap ords and super-init. int64_t ord_len = InStream_Length(ord_in); void *ords = InStream_Buf(ord_in, (size_t)ord_len); SortCache_init((SortCache*)self, field, type, ords, cardinality, doc_max, null_ord, ord_width); // Assign. self->ord_in = (InStream*)INCREF(ord_in); self->dat_in = (InStream*)INCREF(dat_in); // Validate ord file length. double BITS_PER_BYTE = 8.0; double docs_per_byte = BITS_PER_BYTE / self->ord_width; double max_ords = ord_len * docs_per_byte; if (max_ords < self->doc_max + 1) { DECREF(self); THROW(ERR, "Conflict between ord count max %f64 and doc_max %i32 for " "field %o", max_ords, self->doc_max, field); } ABSTRACT_CLASS_CHECK(self, NUMERICSORTCACHE); return self; }
TextSortCache* TextSortCache_init(TextSortCache *self, String *field, FieldType *type, int32_t cardinality, int32_t doc_max, int32_t null_ord, int32_t ord_width, InStream *ord_in, InStream *ix_in, InStream *dat_in) { // Validate. if (!type || !FType_Sortable(type)) { DECREF(self); THROW(ERR, "'%o' isn't a sortable field", field); } // Memory map ords and super-init. int64_t ord_len = InStream_Length(ord_in); const void *ords = InStream_Buf(ord_in, (size_t)ord_len); SortCache_init((SortCache*)self, field, type, ords, cardinality, doc_max, null_ord, ord_width); TextSortCacheIVARS *const ivars = TextSortCache_IVARS(self); // Validate ords file length. double bytes_per_doc = ivars->ord_width / 8.0; double max_ords = ord_len / bytes_per_doc; if (max_ords < ivars->doc_max + 1) { WARN("ORD WIDTH: %i32 %i32", ord_width, ivars->ord_width); THROW(ERR, "Conflict between ord count max %f64 and doc_max %i32 for " "field %o", max_ords, doc_max, field); } // Assign. ivars->ord_in = (InStream*)INCREF(ord_in); ivars->ix_in = (InStream*)INCREF(ix_in); ivars->dat_in = (InStream*)INCREF(dat_in); return self; }
LexIndex* LexIndex_init(LexIndex *self, Schema *schema, Folder *folder, Segment *segment, String *field) { int32_t field_num = Seg_Field_Num(segment, field); String *seg_name = Seg_Get_Name(segment); String *ixix_file = Str_newf("%o/lexicon-%i32.ixix", seg_name, field_num); String *ix_file = Str_newf("%o/lexicon-%i32.ix", seg_name, field_num); Architecture *arch = Schema_Get_Architecture(schema); // Init. Lex_init((Lexicon*)self, field); LexIndexIVARS *const ivars = LexIndex_IVARS(self); ivars->tinfo = TInfo_new(0); ivars->tick = 0; // Derive ivars->field_type = Schema_Fetch_Type(schema, field); if (!ivars->field_type) { String *mess = MAKE_MESS("Unknown field: '%o'", field); DECREF(ix_file); DECREF(ixix_file); DECREF(self); Err_throw_mess(ERR, mess); } ivars->field_type = (FieldType*)INCREF(ivars->field_type); ivars->term_stepper = FType_Make_Term_Stepper(ivars->field_type); ivars->ixix_in = Folder_Open_In(folder, ixix_file); if (!ivars->ixix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(ixix_file); DECREF(self); RETHROW(error); } ivars->ix_in = Folder_Open_In(folder, ix_file); if (!ivars->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(ixix_file); DECREF(self); RETHROW(error); } ivars->index_interval = Arch_Index_Interval(arch); ivars->skip_interval = Arch_Skip_Interval(arch); ivars->size = (int32_t)(InStream_Length(ivars->ixix_in) / sizeof(int64_t)); ivars->offsets = (const int64_t*)InStream_Buf(ivars->ixix_in, (size_t)InStream_Length(ivars->ixix_in)); DECREF(ixix_file); DECREF(ix_file); return self; }
void ScorePost_Read_Record_IMP(ScorePosting *self, InStream *instream) { ScorePostingIVARS *const ivars = ScorePost_IVARS(self); uint32_t position = 0; const size_t max_start_bytes = (C32_MAX_BYTES * 2) + 1; const char *buf = InStream_Buf(instream, max_start_bytes); const uint32_t doc_code = NumUtil_decode_c32(&buf); const uint32_t doc_delta = doc_code >> 1; // Apply delta doc and retrieve freq. ivars->doc_id += doc_delta; if (doc_code & 1) { ivars->freq = 1; } else { ivars->freq = NumUtil_decode_c32(&buf); } // Decode boost/norm byte. ivars->weight = ivars->norm_decoder[*(uint8_t*)buf]; buf++; // Read positions. uint32_t num_prox = ivars->freq; if (num_prox > ivars->prox_cap) { ivars->prox = (uint32_t*)REALLOCATE( ivars->prox, num_prox * sizeof(uint32_t)); ivars->prox_cap = num_prox; } uint32_t *positions = ivars->prox; InStream_Advance_Buf(instream, buf); buf = InStream_Buf(instream, num_prox * C32_MAX_BYTES); while (num_prox--) { position += NumUtil_decode_c32(&buf); *positions++ = position; } InStream_Advance_Buf(instream, buf); }
static void test_Buf(TestBatchRunner *runner) { RAMFile *file = RAMFile_new(NULL, false); OutStream *outstream = OutStream_open((Obj*)file); size_t size = IO_STREAM_BUF_SIZE * 2 + 5; InStream *instream; char *buf; for (uint32_t i = 0; i < size; i++) { OutStream_Write_U8(outstream, 'a'); } OutStream_Close(outstream); instream = InStream_open((Obj*)file); InStreamIVARS *const ivars = InStream_IVARS(instream); buf = InStream_Buf(instream, 5); TEST_INT_EQ(runner, ivars->limit - buf, IO_STREAM_BUF_SIZE, "Small request bumped up"); buf += IO_STREAM_BUF_SIZE - 10; // 10 bytes left in buffer. InStream_Advance_Buf(instream, buf); buf = InStream_Buf(instream, 10); TEST_INT_EQ(runner, ivars->limit - buf, 10, "Exact request doesn't trigger refill"); buf = InStream_Buf(instream, 11); TEST_INT_EQ(runner, ivars->limit - buf, IO_STREAM_BUF_SIZE, "Requesting over limit triggers refill"); int64_t expected = InStream_Length(instream) - InStream_Tell(instream); char *buff = InStream_Buf(instream, 100000); int64_t got = PTR_TO_I64(ivars->limit) - PTR_TO_I64(buff); TEST_TRUE(runner, got == expected, "Requests greater than file size get pared down"); DECREF(instream); DECREF(outstream); DECREF(file); }
Obj* Json_slurp_json(Folder *folder, const CharBuf *path) { InStream *instream = Folder_Open_In(folder, path); if (!instream) { ERR_ADD_FRAME(Err_get_error()); return NULL; } size_t len = (size_t)InStream_Length(instream); char *buf = InStream_Buf(instream, len); Obj *dump = S_parse_json(buf, len); InStream_Close(instream); DECREF(instream); if (!dump) { ERR_ADD_FRAME(Err_get_error()); } return dump; }
SortCache* SortCache_init(SortCache *self, Schema *schema, Folder *folder, Segment *segment, i32_t field_num) { CharBuf *field = Seg_Field_Name(segment, field_num); CharBuf *seg_name = Seg_Get_Name(segment); CharBuf *ord_file = CB_newf("%o/sort-%i32.ord", seg_name, field_num); CharBuf *ix_file = CB_newf("%o/sort-%i32.ix", seg_name, field_num); CharBuf *dat_file = CB_newf("%o/sort-%i32.dat", seg_name, field_num); i64_t ord_len, ix_len, dat_len; /* Derive. */ self->doc_max = Seg_Get_Count(segment); self->type = Schema_Fetch_Type(schema, field); if (!self->type || !FType_Sortable(self->type)) { THROW("'%o' isn't a sortable field", field); } /* Open instreams. */ self->ord_in = Folder_Open_In(folder, ord_file); self->ix_in = Folder_Open_In(folder, ix_file); self->dat_in = Folder_Open_In(folder, dat_file); if (!self->ix_in || !self->dat_in || !self->ord_in) { CharBuf *mess = MAKE_MESS("Can't open either %o, %o or %o", ord_file, ix_file, dat_file); DECREF(ord_file); DECREF(ix_file); DECREF(dat_file); Err_throw_mess(mess); } ord_len = InStream_Length(self->ord_in); ix_len = InStream_Length(self->ix_in); dat_len = InStream_Length(self->dat_in); /* Calculate the number of unique values and derive the ord bit width. */ self->num_uniq = (i32_t)(ix_len / 8) - 1; self->width = S_calc_width(self->num_uniq); /* Validate file lengths. */ { double bytes_per_doc = self->width / 8.0; double max_ords = ord_len / bytes_per_doc; if (max_ords < self->doc_max + 1) { THROW("Conflict between ord count max %f64 and doc_max %i32", max_ords, self->doc_max); } } /* Mmap ords, offsets and character data. */ self->ords = InStream_Buf(self->ord_in, (size_t)ord_len); self->offsets = (i64_t*)InStream_Buf(self->ix_in, (size_t)ix_len); self->char_data = InStream_Buf(self->dat_in, dat_len); { char *offs = (char*)self->offsets; self->offsets_limit = (i64_t*)(offs + ix_len); self->char_data_limit = self->char_data + dat_len; } DECREF(ord_file); DECREF(ix_file); DECREF(dat_file); return self; }