void Freezer_serialize_blob(Blob *blob, OutStream *outstream) { size_t size = Blob_Get_Size(blob); if (size > INT32_MAX) { THROW(ERR, "Can't serialize blob above 2GB: %u64", (uint64_t)size); } OutStream_Write_CU64(outstream, size); OutStream_Write_Bytes(outstream, Blob_Get_Buf(blob), size); }
void BlobSortEx_Feed_IMP(BlobSortEx *self, Obj *item) { BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self); BlobSortEx_Feed_t super_feed = SUPER_METHOD_PTR(BLOBSORTEX, LUCY_BlobSortEx_Feed); super_feed(self, item); // Flush() if necessary. Blob *blob = (Blob*)CERTIFY(item, BLOB); ivars->mem_consumed += Blob_Get_Size(blob); if (ivars->mem_consumed >= ivars->mem_thresh) { BlobSortEx_Flush(self); } }
void SortFieldWriter_Add_IMP(SortFieldWriter *self, int32_t doc_id, Obj *value) { SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self); Counter *counter = ivars->counter; Counter_Add(counter, ivars->mem_per_entry); if (ivars->prim_id == FType_TEXT) { int64_t size = Str_Get_Size((String*)value) + 1; size = SI_increase_to_word_multiple(size); Counter_Add(counter, size); } else if (ivars->prim_id == FType_BLOB) { int64_t size = Blob_Get_Size((Blob*)value) + 1; size = SI_increase_to_word_multiple(size); Counter_Add(counter, size); } SFWriterElem *elem = S_SFWriterElem_create(Obj_Clone(value), doc_id); SortFieldWriter_Feed(self, (Obj*)elem); ivars->count++; }
uint32_t BlobSortEx_Refill_IMP(BlobSortEx *self) { BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self); // Make sure buffer is empty, then set buffer tick vars. if (ivars->buf_max - ivars->buf_tick > 0) { THROW(ERR, "Refill called but buffer contains %u32 items", ivars->buf_max - ivars->buf_tick); } ivars->buf_tick = 0; ivars->buf_max = 0; // Read in elements. while (1) { Blob *elem = NULL; if (ivars->mem_consumed >= ivars->mem_thresh) { ivars->mem_consumed = 0; break; } else if (ivars->external_tick >= Vec_Get_Size(ivars->external)) { break; } else { elem = (Blob*)Vec_Fetch(ivars->external, ivars->external_tick); ivars->external_tick++; // Should be + sizeof(Blob), but that's ok. ivars->mem_consumed += Blob_Get_Size(elem); } if (ivars->buf_max == ivars->buf_cap) { BlobSortEx_Grow_Buffer(self, Memory_oversize(ivars->buf_max + 1, sizeof(Obj*))); } ivars->buffer[ivars->buf_max++] = INCREF(elem); } return ivars->buf_max; }
static TermVector* S_extract_tv_from_tv_buf(String *field, String *term_text, Blob *tv_buf) { TermVector *retval = NULL; const char *posdata = Blob_Get_Buf(tv_buf); const char *posdata_end = posdata + Blob_Get_Size(tv_buf); int32_t *positions = NULL; int32_t *starts = NULL; int32_t *ends = NULL; uint32_t num_pos = 0; if (posdata != posdata_end) { num_pos = NumUtil_decode_cu32(&posdata); positions = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t)); starts = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t)); ends = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t)); } // Expand CI32s. for (uint32_t i = 0; i < num_pos; i++) { positions[i] = NumUtil_decode_ci32(&posdata); starts[i] = NumUtil_decode_ci32(&posdata); ends[i] = NumUtil_decode_ci32(&posdata); } if (posdata != posdata_end) { THROW(ERR, "Bad encoding of posdata"); } else { I32Array *posits_map = I32Arr_new_steal(positions, num_pos); I32Array *starts_map = I32Arr_new_steal(starts, num_pos); I32Array *ends_map = I32Arr_new_steal(ends, num_pos); retval = TV_new(field, term_text, posits_map, starts_map, ends_map); DECREF(posits_map); DECREF(starts_map); DECREF(ends_map); } return retval; }
void Freezer_serialize_blob(Blob *blob, OutStream *outstream) { size_t size = Blob_Get_Size(blob); OutStream_Write_C32(outstream, size); OutStream_Write_Bytes(outstream, Blob_Get_Buf(blob), size); }
void DocWriter_Add_Inverted_Doc_IMP(DocWriter *self, Inverter *inverter, int32_t doc_id) { DocWriterIVARS *const ivars = DocWriter_IVARS(self); OutStream *dat_out = S_lazy_init(self); OutStream *ix_out = ivars->ix_out; uint32_t num_stored = 0; int64_t start = OutStream_Tell(dat_out); int64_t expected = OutStream_Tell(ix_out) / 8; // Verify doc id. if (doc_id != expected) { THROW(ERR, "Expected doc id %i64 but got %i32", expected, doc_id); } // Write the number of stored fields. Inverter_Iterate(inverter); while (Inverter_Next(inverter)) { FieldType *type = Inverter_Get_Type(inverter); if (FType_Stored(type)) { num_stored++; } } OutStream_Write_CU32(dat_out, num_stored); Inverter_Iterate(inverter); while (Inverter_Next(inverter)) { // Only store fields marked as "stored". FieldType *type = Inverter_Get_Type(inverter); if (FType_Stored(type)) { String *field = Inverter_Get_Field_Name(inverter); Obj *value = Inverter_Get_Value(inverter); Freezer_serialize_string(field, dat_out); switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: { const char *buf = Str_Get_Ptr8((String*)value); size_t size = Str_Get_Size((String*)value); if (size > INT32_MAX) { THROW(ERR, "Field %o over 2GB: %u64", field, (uint64_t)size); } OutStream_Write_CU32(dat_out, (uint32_t)size); OutStream_Write_Bytes(dat_out, buf, size); break; } case FType_BLOB: { const char *buf = Blob_Get_Buf((Blob*)value); size_t size = Blob_Get_Size((Blob*)value); if (size > INT32_MAX) { THROW(ERR, "Field %o over 2GB: %u64", field, (uint64_t)size); } OutStream_Write_CU32(dat_out, (uint32_t)size); OutStream_Write_Bytes(dat_out, buf, size); break; } case FType_INT32: { int32_t val = (int32_t)Int_Get_Value((Integer*)value); OutStream_Write_CI32(dat_out, val); break; } case FType_INT64: { int64_t val = Int_Get_Value((Integer*)value); OutStream_Write_CI64(dat_out, val); break; } case FType_FLOAT32: { float val = (float)Float_Get_Value((Float*)value); OutStream_Write_F32(dat_out, val); break; } case FType_FLOAT64: { double val = Float_Get_Value((Float*)value); OutStream_Write_F64(dat_out, val); break; } default: THROW(ERR, "Unrecognized type: %o", type); } } } // Write file pointer. OutStream_Write_I64(ix_out, start); }
static void S_write_val(Obj *val, int8_t prim_id, OutStream *ix_out, OutStream *dat_out, int64_t dat_start) { if (val) { switch (prim_id & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: { String *string = (String*)val; int64_t dat_pos = OutStream_Tell(dat_out) - dat_start; OutStream_Write_I64(ix_out, dat_pos); OutStream_Write_Bytes(dat_out, Str_Get_Ptr8(string), Str_Get_Size(string)); break; } case FType_BLOB: { Blob *blob = (Blob*)val; int64_t dat_pos = OutStream_Tell(dat_out) - dat_start; OutStream_Write_I64(ix_out, dat_pos); OutStream_Write_Bytes(dat_out, Blob_Get_Buf(blob), Blob_Get_Size(blob)); break; } case FType_INT32: { int32_t i32 = (int32_t)Int_Get_Value((Integer*)val); OutStream_Write_I32(dat_out, i32); break; } case FType_INT64: { int64_t i64 = Int_Get_Value((Integer*)val); OutStream_Write_I64(dat_out, i64); break; } case FType_FLOAT32: { float f32 = (float)Float_Get_Value((Float*)val); OutStream_Write_F32(dat_out, f32); break; } case FType_FLOAT64: { double f64 = Float_Get_Value((Float*)val); OutStream_Write_F64(dat_out, f64); break; } default: THROW(ERR, "Unrecognized primitive id: %i32", (int32_t)prim_id); } } else { switch (prim_id & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: case FType_BLOB: { int64_t dat_pos = OutStream_Tell(dat_out) - dat_start; OutStream_Write_I64(ix_out, dat_pos); } break; case FType_INT32: OutStream_Write_I32(dat_out, 0); break; case FType_INT64: OutStream_Write_I64(dat_out, 0); break; case FType_FLOAT64: OutStream_Write_F64(dat_out, 0.0); break; case FType_FLOAT32: OutStream_Write_F32(dat_out, 0.0f); break; default: THROW(ERR, "Unrecognized primitive id: %i32", (int32_t)prim_id); } } }