static void test_refill(TestBatchRunner *runner) { RAMFile *file = RAMFile_new(NULL, false); OutStream *outstream = OutStream_open((Obj*)file); InStream *instream; char scratch[5]; InStreamIVARS *ivars; for (int32_t i = 0; i < 1023; i++) { OutStream_Write_U8(outstream, 'x'); } OutStream_Write_U8(outstream, 'y'); OutStream_Write_U8(outstream, 'z'); OutStream_Close(outstream); instream = InStream_open((Obj*)file); ivars = InStream_IVARS(instream); InStream_Refill(instream); TEST_INT_EQ(runner, ivars->limit - ivars->buf, IO_STREAM_BUF_SIZE, "Refill"); TEST_INT_EQ(runner, (long)InStream_Tell(instream), 0, "Correct file pos after standing-start Refill()"); DECREF(instream); instream = InStream_open((Obj*)file); ivars = InStream_IVARS(instream); InStream_Fill(instream, 30); TEST_INT_EQ(runner, ivars->limit - ivars->buf, 30, "Fill()"); TEST_INT_EQ(runner, (long)InStream_Tell(instream), 0, "Correct file pos after standing-start Fill()"); DECREF(instream); instream = InStream_open((Obj*)file); ivars = InStream_IVARS(instream); InStream_Read_Bytes(instream, scratch, 5); TEST_INT_EQ(runner, ivars->limit - ivars->buf, IO_STREAM_BUF_SIZE - 5, "small read triggers refill"); DECREF(instream); instream = InStream_open((Obj*)file); ivars = InStream_IVARS(instream); TEST_INT_EQ(runner, InStream_Read_U8(instream), 'x', "Read_U8"); InStream_Seek(instream, 1023); TEST_INT_EQ(runner, (long)FileWindow_IVARS(ivars->window)->offset, 0, "no unnecessary refill on Seek"); TEST_INT_EQ(runner, (long)InStream_Tell(instream), 1023, "Seek/Tell"); TEST_INT_EQ(runner, InStream_Read_U8(instream), 'y', "correct data after in-buffer Seek()"); TEST_INT_EQ(runner, InStream_Read_U8(instream), 'z', "automatic Refill"); TEST_TRUE(runner, (FileWindow_IVARS(ivars->window)->offset != 0), "refilled"); DECREF(instream); DECREF(outstream); DECREF(file); }
static void test_Clone_and_Reopen(TestBatchRunner *runner) { String *foo = SSTR_WRAP_C("foo"); String *bar = SSTR_WRAP_C("bar"); RAMFile *file = RAMFile_new(NULL, false); OutStream *outstream = OutStream_open((Obj*)file); RAMFileHandle *fh; InStream *instream; InStream *clone; InStream *reopened; for (uint8_t i = 0; i < 26; i++) { OutStream_Write_U8(outstream, 'a' + i); } OutStream_Close(outstream); fh = RAMFH_open(foo, FH_READ_ONLY, file); instream = InStream_open((Obj*)fh); InStream_Seek(instream, 1); TEST_TRUE(runner, Str_Equals(InStream_Get_Filename(instream), (Obj*)foo), "Get_Filename"); clone = InStream_Clone(instream); TEST_TRUE(runner, Str_Equals(InStream_Get_Filename(clone), (Obj*)foo), "Clones have same filename"); TEST_TRUE(runner, InStream_Length(instream) == InStream_Length(clone), "Clones have same length"); TEST_TRUE(runner, InStream_Read_U8(instream) == InStream_Read_U8(clone), "Clones start at same file position"); reopened = InStream_Reopen(instream, bar, 25, 1); TEST_TRUE(runner, Str_Equals(InStream_Get_Filename(reopened), (Obj*)bar), "Reopened InStreams take new filename"); TEST_TRUE(runner, InStream_Read_U8(reopened) == 'z', "Reopened stream starts at supplied offset"); TEST_TRUE(runner, InStream_Length(reopened) == 1, "Reopened stream uses supplied length"); TEST_TRUE(runner, InStream_Tell(reopened) == 1, "Tell() uses supplied offset for reopened stream"); InStream_Seek(reopened, 0); TEST_TRUE(runner, InStream_Read_U8(reopened) == 'z', "Seek() uses supplied offset for reopened stream"); DECREF(reopened); DECREF(clone); DECREF(instream); DECREF(outstream); DECREF(fh); DECREF(file); }
RawPosting* RichPost_read_raw(RichPosting *self, InStream *instream, int32_t last_doc_id, CharBuf *term_text, MemoryPool *mem_pool) { char *const text_buf = (char*)CB_Get_Ptr8(term_text); const size_t text_size = CB_Get_Size(term_text); const uint32_t doc_code = InStream_Read_C32(instream); const uint32_t delta_doc = doc_code >> 1; const int32_t doc_id = last_doc_id + delta_doc; const uint32_t freq = (doc_code & 1) ? 1 : InStream_Read_C32(instream); size_t raw_post_bytes = MAX_RAW_POSTING_LEN(text_size, freq); void *const allocation = MemPool_Grab(mem_pool, raw_post_bytes); RawPosting *const raw_posting = RawPost_new(allocation, doc_id, freq, text_buf, text_size); uint32_t num_prox = freq; char *const start = raw_posting->blob + text_size; char * dest = start; UNUSED_VAR(self); // Read positions and per-position boosts. while (num_prox--) { dest += InStream_Read_Raw_C64(instream, dest); *((uint8_t*)dest) = InStream_Read_U8(instream); dest++; } // Resize raw posting memory allocation. raw_posting->aux_len = dest - start; raw_post_bytes = dest - (char*)raw_posting; MemPool_Resize(mem_pool, raw_posting, raw_post_bytes); return raw_posting; }
RangeQuery* RangeQuery_Deserialize_IMP(RangeQuery *self, InStream *instream) { // Deserialize components. float boost = InStream_Read_F32(instream); String *field = Freezer_read_string(instream); Obj *lower_term = InStream_Read_U8(instream) ? THAW(instream) : NULL; Obj *upper_term = InStream_Read_U8(instream) ? THAW(instream) : NULL; bool include_lower = !!InStream_Read_U8(instream); bool include_upper = !!InStream_Read_U8(instream); // Init object. RangeQuery_init(self, field, lower_term, upper_term, include_lower, include_upper); RangeQuery_Set_Boost(self, boost); DECREF(upper_term); DECREF(lower_term); DECREF(field); return self; }
MatchDoc* MatchDoc_deserialize(MatchDoc *self, InStream *instream) { self = self ? self : (MatchDoc*)VTable_Make_Obj(&MATCHDOC); self->doc_id = InStream_Read_C32(instream); self->score = InStream_Read_Float(instream); if (InStream_Read_U8(instream)) { self->values = VA_deserialize(NULL, instream); } return self; }
RangeQuery* RangeQuery_deserialize(RangeQuery *self, InStream *instream) { // Deserialize components. float boost = InStream_Read_F32(instream); CharBuf *field = CB_Deserialize((CharBuf*)VTable_Make_Obj(CHARBUF), instream); Obj *lower_term = InStream_Read_U8(instream) ? THAW(instream) : NULL; Obj *upper_term = InStream_Read_U8(instream) ? THAW(instream) : NULL; bool_t include_lower = InStream_Read_U8(instream); bool_t include_upper = InStream_Read_U8(instream); // Init object. RangeQuery_init(self, field, lower_term, upper_term, include_lower, include_upper); RangeQuery_Set_Boost(self, boost); DECREF(upper_term); DECREF(lower_term); DECREF(field); return self; }
LeafQuery* LeafQuery_Deserialize_IMP(LeafQuery *self, InStream *instream) { LeafQueryIVARS *const ivars = LeafQuery_IVARS(self); if (InStream_Read_U8(instream)) { ivars->field = Freezer_read_string(instream); } else { ivars->field = NULL; } ivars->text = Freezer_read_string(instream); ivars->boost = InStream_Read_F32(instream); return self; }
void RichPost_read_record(RichPosting *self, InStream *instream) { float *const norm_decoder = self->norm_decoder; uint32_t doc_code; uint32_t num_prox = 0; uint32_t position = 0; uint32_t *positions; float *prox_boosts; float aggregate_weight = 0.0; // Decode delta doc. doc_code = InStream_Read_C32(instream); self->doc_id += doc_code >> 1; // If the stored num was odd, the freq is 1. if (doc_code & 1) { self->freq = 1; } // Otherwise, freq was stored as a C32. else { self->freq = InStream_Read_C32(instream); } // Read positions, aggregate per-position boost byte into weight. num_prox = self->freq; if (num_prox > self->prox_cap) { self->prox = (uint32_t*)REALLOCATE(self->prox, num_prox * sizeof(uint32_t)); self->prox_boosts = (float*)REALLOCATE(self->prox_boosts, num_prox * sizeof(float)); } positions = self->prox; prox_boosts = self->prox_boosts; while (num_prox--) { position += InStream_Read_C32(instream); *positions++ = position; *prox_boosts = norm_decoder[ InStream_Read_U8(instream) ]; aggregate_weight += *prox_boosts; prox_boosts++; } self->weight = aggregate_weight / self->freq; }
RawPosting* ScorePost_Read_Raw_IMP(ScorePosting *self, InStream *instream, int32_t last_doc_id, String *term_text, MemoryPool *mem_pool) { const char *const text_buf = Str_Get_Ptr8(term_text); const size_t text_size = Str_Get_Size(term_text); const uint32_t doc_code = InStream_Read_C32(instream); const uint32_t delta_doc = doc_code >> 1; const int32_t doc_id = last_doc_id + delta_doc; const uint32_t freq = (doc_code & 1) ? 1 : InStream_Read_C32(instream); const size_t base_size = Class_Get_Obj_Alloc_Size(RAWPOSTING); size_t raw_post_bytes = MAX_RAW_POSTING_LEN(base_size, text_size, freq); void *const allocation = MemPool_Grab(mem_pool, raw_post_bytes); RawPosting *const raw_posting = RawPost_new(allocation, doc_id, freq, text_buf, text_size); RawPostingIVARS *const raw_post_ivars = RawPost_IVARS(raw_posting); uint32_t num_prox = freq; char *const start = raw_post_ivars->blob + text_size; char *dest = start; UNUSED_VAR(self); // Field_boost. *((uint8_t*)dest) = InStream_Read_U8(instream); dest++; // Read positions. while (num_prox--) { dest += InStream_Read_Raw_C64(instream, dest); } // Resize raw posting memory allocation. raw_post_ivars->aux_len = dest - start; raw_post_bytes = dest - (char*)raw_posting; MemPool_Resize(mem_pool, raw_posting, raw_post_bytes); return raw_posting; }
Obj* Freezer_deserialize(Obj *obj, InStream *instream) { if (Obj_is_a(obj, STRING)) { obj = (Obj*)Freezer_deserialize_string((String*)obj, instream); } else if (Obj_is_a(obj, BLOB)) { obj = (Obj*)Freezer_deserialize_blob((Blob*)obj, instream); } else if (Obj_is_a(obj, VECTOR)) { obj = (Obj*)Freezer_deserialize_varray((Vector*)obj, instream); } else if (Obj_is_a(obj, HASH)) { obj = (Obj*)Freezer_deserialize_hash((Hash*)obj, instream); } else if (Obj_is_a(obj, INTEGER)) { int64_t value = (int64_t)InStream_Read_C64(instream); obj = (Obj*)Int_init((Integer*)obj, value); } else if (Obj_is_a(obj, FLOAT)) { double value = InStream_Read_F64(instream); obj = (Obj*)Float_init((Float*)obj, value); } else if (Obj_is_a(obj, BOOLEAN)) { bool value = !!InStream_Read_U8(instream); Obj *result = value ? INCREF(CFISH_TRUE) : INCREF(CFISH_FALSE); // FIXME: This DECREF is essentially a no-op causing a // memory leak. DECREF(obj); obj = result; } else if (Obj_is_a(obj, QUERY)) { obj = (Obj*)Query_Deserialize((Query*)obj, instream); } else if (Obj_is_a(obj, DOC)) { obj = (Obj*)Doc_Deserialize((Doc*)obj, instream); } else if (Obj_is_a(obj, DOCVECTOR)) { obj = (Obj*)DocVec_Deserialize((DocVector*)obj, instream); } else if (Obj_is_a(obj, TERMVECTOR)) { obj = (Obj*)TV_Deserialize((TermVector*)obj, instream); } else if (Obj_is_a(obj, SIMILARITY)) { obj = (Obj*)Sim_Deserialize((Similarity*)obj, instream); } else if (Obj_is_a(obj, MATCHDOC)) { obj = (Obj*)MatchDoc_Deserialize((MatchDoc*)obj, instream); } else if (Obj_is_a(obj, TOPDOCS)) { obj = (Obj*)TopDocs_Deserialize((TopDocs*)obj, instream); } else if (Obj_is_a(obj, SORTSPEC)) { obj = (Obj*)SortSpec_Deserialize((SortSpec*)obj, instream); } else if (Obj_is_a(obj, SORTRULE)) { obj = (Obj*)SortRule_Deserialize((SortRule*)obj, instream); } else { THROW(ERR, "Don't know how to deserialize a %o", Obj_get_class_name(obj)); } return obj; }