DocVector* DefHLReader_fetch_doc_vec(DefaultHighlightReader *self, int32_t doc_id) { DocVector *doc_vec = DocVec_new(); int64_t file_pos; uint32_t num_fields; InStream_Seek(self->ix_in, doc_id * 8); file_pos = InStream_Read_I64(self->ix_in); InStream_Seek(self->dat_in, file_pos); num_fields = InStream_Read_C32(self->dat_in); while (num_fields--) { CharBuf *field = CB_deserialize(NULL, self->dat_in); ByteBuf *field_buf = BB_deserialize(NULL, self->dat_in); DocVec_Add_Field_Buf(doc_vec, field, field_buf); DECREF(field_buf); DECREF(field); } return doc_vec; }
DocVector* DefHLReader_Fetch_Doc_Vec_IMP(DefaultHighlightReader *self, int32_t doc_id) { DefaultHighlightReaderIVARS *const ivars = DefHLReader_IVARS(self); InStream *const ix_in = ivars->ix_in; InStream *const dat_in = ivars->dat_in; DocVector *doc_vec = DocVec_new(); InStream_Seek(ix_in, doc_id * 8); int64_t file_pos = InStream_Read_I64(ix_in); InStream_Seek(dat_in, file_pos); uint32_t num_fields = InStream_Read_CU32(dat_in); while (num_fields--) { String *field = Freezer_read_string(dat_in); Blob *field_buf = Freezer_read_blob(dat_in); DocVec_Add_Field_Buf(doc_vec, field, field_buf); DECREF(field_buf); DECREF(field); } return doc_vec; }
DocVector* DefHLReader_fetch_doc_vec(DefaultHighlightReader *self, int32_t doc_id) { InStream *const ix_in = self->ix_in; InStream *const dat_in = self->dat_in; DocVector *doc_vec = DocVec_new(); InStream_Seek(ix_in, doc_id * 8); int64_t file_pos = InStream_Read_I64(ix_in); InStream_Seek(dat_in, file_pos); uint32_t num_fields = InStream_Read_C32(dat_in); while (num_fields--) { CharBuf *field = CB_Deserialize((CharBuf*)VTable_Make_Obj(CHARBUF), dat_in); ByteBuf *field_buf = BB_Deserialize((ByteBuf*)VTable_Make_Obj(BYTEBUF), dat_in); DocVec_Add_Field_Buf(doc_vec, field, field_buf); DECREF(field_buf); DECREF(field); } return doc_vec; }
Obj* I64SortCache_value(Int64SortCache *self, int32_t ord, Obj *blank) { if (ord == self->null_ord) { return NULL; } else if (ord < 0) { THROW(ERR, "Ordinal less than 0 for %o: %i32", self->field, ord); } else { Integer64 *int_blank = (Integer64*)CERTIFY(blank, INTEGER64); InStream_Seek(self->dat_in, ord * sizeof(int64_t)); Int64_Set_Value(int_blank, InStream_Read_I64(self->dat_in)); } return blank; }
Obj* F32SortCache_value(Float32SortCache *self, int32_t ord, Obj *blank) { if (ord == self->null_ord) { return NULL; } else if (ord < 0) { THROW(ERR, "Ordinal less than 0 for %o: %i32", self->field, ord); } else { Float32 *num_blank = (Float32*)CERTIFY(blank, FLOAT32); InStream_Seek(self->dat_in, ord * sizeof(float)); Float32_Set_Value(num_blank, InStream_Read_F32(self->dat_in)); } return blank; }
static void S_read_entry(LexIndex *self) { LexIndexIVARS *const ivars = LexIndex_IVARS(self); InStream *ix_in = ivars->ix_in; TermInfo *const tinfo = ivars->tinfo; int64_t offset = (int64_t)NumUtil_decode_bigend_u64(ivars->offsets + ivars->tick); InStream_Seek(ix_in, offset); TermStepper_Read_Key_Frame(ivars->term_stepper, ix_in); int32_t doc_freq = InStream_Read_C32(ix_in); TInfo_Set_Doc_Freq(tinfo, doc_freq); TInfo_Set_Post_FilePos(tinfo, InStream_Read_C64(ix_in)); int64_t skip_filepos = doc_freq >= ivars->skip_interval ? InStream_Read_C64(ix_in) : 0; TInfo_Set_Skip_FilePos(tinfo, skip_filepos); TInfo_Set_Lex_FilePos(tinfo, InStream_Read_C64(ix_in)); }
InStream* InStream_reopen(InStream *self, const CharBuf *filename, int64_t offset, int64_t len) { if (!self->file_handle) { THROW(ERR, "Can't Reopen() closed InStream %o", self->filename); } if (offset + len > FH_Length(self->file_handle)) { THROW(ERR, "Offset + length too large (%i64 + %i64 > %i64)", offset, len, FH_Length(self->file_handle)); } InStream *twin = (InStream*)VTable_Make_Obj(self->vtable); InStream_do_open(twin, (Obj*)self->file_handle); if (filename != NULL) { CB_Mimic(twin->filename, (Obj*)filename); } twin->offset = offset; twin->len = len; InStream_Seek(twin, 0); return twin; }
InStream* InStream_reopen(InStream *self, const CharBuf *filename, int64_t offset, int64_t len) { InStreamIVARS *const ivars = InStream_IVARS(self); if (!ivars->file_handle) { THROW(ERR, "Can't Reopen() closed InStream %o", ivars->filename); } if (offset + len > FH_Length(ivars->file_handle)) { THROW(ERR, "Offset + length too large (%i64 + %i64 > %i64)", offset, len, FH_Length(ivars->file_handle)); } VTable *vtable = InStream_Get_VTable(self); InStream *other = (InStream*)VTable_Make_Obj(vtable); InStreamIVARS *const ovars = InStream_IVARS(other); InStream_do_open(other, (Obj*)ivars->file_handle); if (filename != NULL) { CB_Mimic(ovars->filename, (Obj*)filename); } ovars->offset = offset; ovars->len = len; InStream_Seek(other, 0); return other; }
HitDoc* DefDocReader_Fetch_Doc_IMP(DefaultDocReader *self, int32_t doc_id) { DefaultDocReaderIVARS *const ivars = DefDocReader_IVARS(self); Schema *const schema = ivars->schema; InStream *const dat_in = ivars->dat_in; InStream *const ix_in = ivars->ix_in; Hash *const fields = Hash_new(1); int64_t start; uint32_t num_fields; uint32_t field_name_cap = 31; char *field_name = (char*)MALLOCATE(field_name_cap + 1); // Get data file pointer from index, read number of fields. InStream_Seek(ix_in, (int64_t)doc_id * 8); start = InStream_Read_U64(ix_in); InStream_Seek(dat_in, start); num_fields = InStream_Read_C32(dat_in); // Decode stored data and build up the doc field by field. while (num_fields--) { uint32_t field_name_len; Obj *value; FieldType *type; // Read field name. field_name_len = InStream_Read_C32(dat_in); if (field_name_len > field_name_cap) { field_name_cap = field_name_len; field_name = (char*)REALLOCATE(field_name, field_name_cap + 1); } InStream_Read_Bytes(dat_in, field_name, field_name_len); // Find the Field's FieldType. StackString *field_name_str = SSTR_WRAP_UTF8(field_name, field_name_len); type = Schema_Fetch_Type(schema, (String*)field_name_str); // Read the field value. switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: { uint32_t value_len = InStream_Read_C32(dat_in); char *buf = (char*)MALLOCATE(value_len + 1); InStream_Read_Bytes(dat_in, buf, value_len); buf[value_len] = '\0'; value = (Obj*)Str_new_steal_utf8(buf, value_len); break; } case FType_BLOB: { uint32_t value_len = InStream_Read_C32(dat_in); char *buf = (char*)MALLOCATE(value_len); InStream_Read_Bytes(dat_in, buf, value_len); value = (Obj*)BB_new_steal_bytes( buf, value_len, value_len); break; } case FType_FLOAT32: value = (Obj*)Float32_new( InStream_Read_F32(dat_in)); break; case FType_FLOAT64: value = (Obj*)Float64_new( InStream_Read_F64(dat_in)); break; case FType_INT32: value = (Obj*)Int32_new( (int32_t)InStream_Read_C32(dat_in)); break; case FType_INT64: value = (Obj*)Int64_new( (int64_t)InStream_Read_C64(dat_in)); break; default: value = NULL; THROW(ERR, "Unrecognized type: %o", type); } // Store the value. Hash_Store_Utf8(fields, field_name, field_name_len, value); } FREEMEM(field_name); HitDoc *retval = HitDoc_new(fields, doc_id, 0.0); DECREF(fields); return retval; }
void LexIndex_Seek_IMP(LexIndex *self, Obj *target) { LexIndexIVARS *const ivars = LexIndex_IVARS(self); TermStepper *term_stepper = ivars->term_stepper; InStream *ix_in = ivars->ix_in; FieldType *type = ivars->field_type; int32_t lo = 0; int32_t hi = ivars->size - 1; int32_t result = -100; if (target == NULL || ivars->size == 0) { ivars->tick = 0; return; } else { if (!Obj_is_a(target, STRING)) { THROW(ERR, "Target is a %o, and not comparable to a %o", Obj_get_class_name(target), Class_Get_Name(STRING)); } /* TODO: Obj *first_obj = Vec_Fetch(terms, 0); if (!Obj_is_a(target, Obj_get_class(first_obj))) { THROW(ERR, "Target is a %o, and not comparable to a %o", Obj_get_class_name(target), Obj_get_class_name(first_obj)); } */ } // Divide and conquer. while (hi >= lo) { const int32_t mid = lo + ((hi - lo) / 2); const int64_t offset = (int64_t)NumUtil_decode_bigend_u64(ivars->offsets + mid); InStream_Seek(ix_in, offset); TermStepper_Read_Key_Frame(term_stepper, ix_in); // Compare values. There is no need for a NULL-check because the term // number is alway between 0 and ivars->size - 1. Obj *value = TermStepper_Get_Value(term_stepper); int32_t comparison = FType_Compare_Values(type, target, value); if (comparison < 0) { hi = mid - 1; } else if (comparison > 0) { lo = mid + 1; } else { result = mid; break; } } // Record the index of the entry we've seeked to, then read entry. ivars->tick = hi == -1 // indicating that target lt first entry ? 0 : result == -100 // if result is still -100, it wasn't set ? hi : result; S_read_entry(self); }