/*********************************************************************************************************************************** Execute main function in Perl ***********************************************************************************************************************************/ int perlExec(void) { FUNCTION_LOG_VOID(logLevelDebug); // Initialize Perl perlInit(); // Run perl main function perlEval(perlMain()); // Return result code int code = (int)SvIV(get_sv("iResult", 0)); bool errorC = (int)SvIV(get_sv("bErrorC", 0)); char *message = SvPV_nolen(get_sv("strMessage", 0)); // {uncovered - internal Perl macro branch} if (code >= errorTypeCode(&AssertError)) // {uncovered - success tested in integration} { if (errorC) // {+uncovered} RETHROW(); // {+uncovered} else THROW_CODE(code, strlen(message) == 0 ? PERL_EMBED_ERROR : message); // {+uncovered} } FUNCTION_LOG_RETURN(INT, code); // {+uncovered} }
static OutStream* S_lazy_init(HighlightWriter *self) { if (!self->dat_out) { Segment *segment = self->segment; Folder *folder = self->folder; CharBuf *seg_name = Seg_Get_Name(segment); // Open outstreams. { CharBuf *ix_file = CB_newf("%o/highlight.ix", seg_name); self->ix_out = Folder_Open_Out(folder, ix_file); DECREF(ix_file); if (!self->ix_out) { RETHROW(INCREF(Err_get_error())); } } { CharBuf *dat_file = CB_newf("%o/highlight.dat", seg_name); self->dat_out = Folder_Open_Out(folder, dat_file); DECREF(dat_file); if (!self->dat_out) { RETHROW(INCREF(Err_get_error())); } } // Go past invalid doc 0. OutStream_Write_I64(self->ix_out, 0); } return self->dat_out; }
void LexWriter_start_field(LexiconWriter *self, int32_t field_num) { Segment *const segment = LexWriter_Get_Segment(self); Folder *const folder = LexWriter_Get_Folder(self); Schema *const schema = LexWriter_Get_Schema(self); CharBuf *const seg_name = Seg_Get_Name(segment); CharBuf *const field = Seg_Field_Name(segment, field_num); FieldType *const type = Schema_Fetch_Type(schema, field); // Open outstreams. CB_setf(self->dat_file, "%o/lexicon-%i32.dat", seg_name, field_num); CB_setf(self->ix_file, "%o/lexicon-%i32.ix", seg_name, field_num); CB_setf(self->ixix_file, "%o/lexicon-%i32.ixix", seg_name, field_num); self->dat_out = Folder_Open_Out(folder, self->dat_file); if (!self->dat_out) { RETHROW(INCREF(Err_get_error())); } self->ix_out = Folder_Open_Out(folder, self->ix_file); if (!self->ix_out) { RETHROW(INCREF(Err_get_error())); } self->ixix_out = Folder_Open_Out(folder, self->ixix_file); if (!self->ixix_out) { RETHROW(INCREF(Err_get_error())); } // Initialize count and ix_count, term stepper and term info stepper. self->count = 0; self->ix_count = 0; self->term_stepper = FType_Make_Term_Stepper(type); TermStepper_Reset(self->tinfo_stepper); }
static OutStream* S_lazy_init(HighlightWriter *self) { HighlightWriterIVARS *const ivars = HLWriter_IVARS(self); if (!ivars->dat_out) { Segment *segment = ivars->segment; Folder *folder = ivars->folder; String *seg_name = Seg_Get_Name(segment); // Open outstreams. String *ix_file = Str_newf("%o/highlight.ix", seg_name); ivars->ix_out = Folder_Open_Out(folder, ix_file); DECREF(ix_file); if (!ivars->ix_out) { RETHROW(INCREF(Err_get_error())); } String *dat_file = Str_newf("%o/highlight.dat", seg_name); ivars->dat_out = Folder_Open_Out(folder, dat_file); DECREF(dat_file); if (!ivars->dat_out) { RETHROW(INCREF(Err_get_error())); } // Go past invalid doc 0. OutStream_Write_I64(ivars->ix_out, 0); } return ivars->dat_out; }
sparkey_returncode sparkey_load_hashheader(sparkey_hashheader *header, const char *filename) { FILE *fp = fopen(filename, "r"); if (fp == NULL) { return sparkey_open_returncode(errno); } uint32_t tmp; RETHROW(fread_little_endian32(fp, &tmp)); if (tmp != HASH_MAGIC_NUMBER) { fclose(fp); return SPARKEY_WRONG_HASH_MAGIC_NUMBER; } RETHROW(fread_little_endian32(fp, &header->major_version)); if (header->major_version != HASH_MAJOR_VERSION) { fclose(fp); return SPARKEY_WRONG_HASH_MAJOR_VERSION; } RETHROW(fread_little_endian32(fp, &header->minor_version)); if (header->minor_version > HASH_MINOR_VERSION) { fclose(fp); return SPARKEY_UNSUPPORTED_HASH_MINOR_VERSION; } int version = header->minor_version; loader l = loaders[version]; if (l == NULL) { fclose(fp); return SPARKEY_INTERNAL_ERROR; } sparkey_returncode x = (*l)(header, fp); fclose(fp); return x; }
void PostPool_Flip_IMP(PostingPool *self) { PostingPoolIVARS *const ivars = PostPool_IVARS(self); uint32_t num_runs = VA_Get_Size(ivars->runs); uint32_t sub_thresh = num_runs > 0 ? ivars->mem_thresh / num_runs : ivars->mem_thresh; if (num_runs) { Folder *folder = PolyReader_Get_Folder(ivars->polyreader); String *seg_name = Seg_Get_Name(ivars->segment); String *lex_temp_path = Str_newf("%o/lextemp", seg_name); String *post_temp_path = Str_newf("%o/ptemp", seg_name); ivars->lex_temp_in = Folder_Open_In(folder, lex_temp_path); if (!ivars->lex_temp_in) { RETHROW(INCREF(Err_get_error())); } ivars->post_temp_in = Folder_Open_In(folder, post_temp_path); if (!ivars->post_temp_in) { RETHROW(INCREF(Err_get_error())); } DECREF(lex_temp_path); DECREF(post_temp_path); } PostPool_Sort_Buffer(self); if (num_runs && (ivars->buf_max - ivars->buf_tick) > 0) { uint32_t num_items = PostPool_Buffer_Count(self); // Cheap imitation of flush. FIXME. PostingPool *run = PostPool_new(ivars->schema, ivars->snapshot, ivars->segment, ivars->polyreader, ivars->field, ivars->lex_writer, ivars->mem_pool, ivars->lex_temp_out, ivars->post_temp_out, ivars->skip_out); PostPool_Grow_Buffer(run, num_items); PostingPoolIVARS *const run_ivars = PostPool_IVARS(run); memcpy(run_ivars->buffer, (ivars->buffer) + ivars->buf_tick, num_items * sizeof(Obj*)); run_ivars->buf_max = num_items; PostPool_Add_Run(self, (SortExternal*)run); ivars->buf_tick = 0; ivars->buf_max = 0; } // Assign. for (uint32_t i = 0; i < num_runs; i++) { PostingPool *run = (PostingPool*)VA_Fetch(ivars->runs, i); if (run != NULL) { PostPool_Set_Mem_Thresh(run, sub_thresh); if (!PostPool_IVARS(run)->lexicon) { S_fresh_flip(run, ivars->lex_temp_in, ivars->post_temp_in); } } } ivars->flipped = true; }
DefaultHighlightReader* DefHLReader_init(DefaultHighlightReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, VArray *segments, int32_t seg_tick) { Segment *segment; Hash *metadata; HLReader_init((HighlightReader*)self, schema, folder, snapshot, segments, seg_tick); segment = DefHLReader_Get_Segment(self); metadata = (Hash*)Seg_Fetch_Metadata_Str(segment, "highlight", 9); if (!metadata) { metadata = (Hash*)Seg_Fetch_Metadata_Str(segment, "term_vectors", 12); } // Check format. if (metadata) { Obj *format = Hash_Fetch_Str(metadata, "format", 6); if (!format) { THROW(ERR, "Missing 'format' var"); } else { if (Obj_To_I64(format) != HLWriter_current_file_format) { THROW(ERR, "Unsupported highlight data format: %i64", Obj_To_I64(format)); } } } // Open instreams. { CharBuf *seg_name = Seg_Get_Name(segment); CharBuf *ix_file = CB_newf("%o/highlight.ix", seg_name); CharBuf *dat_file = CB_newf("%o/highlight.dat", seg_name); if (Folder_Exists(folder, ix_file)) { self->ix_in = Folder_Open_In(folder, ix_file); if (!self->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } self->dat_in = Folder_Open_In(folder, dat_file); if (!self->dat_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } } DECREF(ix_file); DECREF(dat_file); } return self; }
DPtr<uint8_t> *RIFActVarBind::toUTF8String() const throw(BadAllocException) { DPtr<uint8_t> *str; DPtr<uint8_t> *varstr; try { varstr = this->var.toUTF8String(); } JUST_RETHROW(BadAllocException, "Cannot stringify RIFActVarBind.") size_t len = 3 + varstr->size(); if (this->frame == NULL) { len += 5; try { NEW(str, MPtr<uint8_t>, len); } catch (bad_alloc &e) { varstr->drop(); THROW(BadAllocException, sizeof(MPtr<uint8_t>)); } catch (BadAllocException &e) { varstr->drop(); RETHROW(e, "Cannot allocate space for stringifying RIFActVarBind."); } uint8_t *write_to = str->dptr(); *write_to = to_ascii('('); memcpy(++write_to, varstr->dptr(), varstr->size()); write_to += varstr->size(); varstr->drop(); ascii_strncpy(write_to, " New())", 7); return str; } DPtr<uint8_t> *framestr; try { framestr = this->frame->toUTF8String(); } catch (BadAllocException &e) { varstr->drop(); RETHROW(e, "Cannot stringify RIFActVarBind."); } len += framestr->size(); try { NEW(str, MPtr<uint8_t>, len); } catch (bad_alloc &e) { varstr->drop(); framestr->drop(); THROW(BadAllocException, sizeof(MPtr<uint8_t>)); } catch (BadAllocException &e) { varstr->drop(); framestr->drop(); RETHROW(e, "Cannot allocate space for stringifying RIFActVarBind."); } uint8_t *write_to = str->dptr(); *write_to = to_ascii('('); memcpy(++write_to, varstr->dptr(), varstr->size()); write_to += varstr->size(); varstr->drop(); *write_to = to_ascii(' '); memcpy(++write_to, framestr->dptr(), framestr->size()); write_to += framestr->size(); framestr->drop(); *write_to = to_ascii(')'); return str; }
sparkey_returncode sparkey_logiter_next(sparkey_logiter *iter, sparkey_logreader *log) { if (iter->state == SPARKEY_ITER_CLOSED) { return SPARKEY_SUCCESS; } uint64_t key_remaining = 0; uint64_t value_remaining = 0; if (iter->state == SPARKEY_ITER_ACTIVE) { key_remaining = iter->key_remaining; value_remaining = iter->value_remaining; } iter->state = SPARKEY_ITER_INVALID; iter->key_remaining = 0; iter->value_remaining = 0; iter->keylen = 0; iter->valuelen = 0; RETHROW(assert_iter_open(iter, log)); RETHROW(skip(iter, log, key_remaining)); RETHROW(skip(iter, log, value_remaining)); RETHROW(ensure_available(iter, log)); if (iter->block_len - iter->block_offset == 0) { // Reached end of data iter->state = SPARKEY_ITER_CLOSED; return SPARKEY_SUCCESS; } if (log->header.compression_type == SPARKEY_COMPRESSION_NONE) { iter->block_position += iter->block_offset; iter->block_len -= iter->block_offset; iter->block_offset = 0; iter->compression_buf = &log->data[iter->block_position]; iter->entry_count = -1; } iter->entry_count++; uint64_t a = read_vlq(iter->compression_buf, &iter->block_offset); uint64_t b = read_vlq(iter->compression_buf, &iter->block_offset); if (a == 0) { iter->keylen = iter->key_remaining = b; iter->valuelen = iter->value_remaining = 0; iter->type = SPARKEY_ENTRY_DELETE; } else { iter->keylen = iter->key_remaining = a - 1; iter->valuelen = iter->value_remaining = b; iter->type = SPARKEY_ENTRY_PUT; } iter->entry_block_position = iter->block_position; iter->entry_block_offset = iter->block_offset; iter->state = SPARKEY_ITER_ACTIVE; return SPARKEY_SUCCESS; }
DefaultDocReader* DefDocReader_init(DefaultDocReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, Vector *segments, int32_t seg_tick) { Hash *metadata; Segment *segment; DocReader_init((DocReader*)self, schema, folder, snapshot, segments, seg_tick); DefaultDocReaderIVARS *const ivars = DefDocReader_IVARS(self); segment = DefDocReader_Get_Segment(self); metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "documents", 9); if (metadata) { String *seg_name = Seg_Get_Name(segment); String *ix_file = Str_newf("%o/documents.ix", seg_name); String *dat_file = Str_newf("%o/documents.dat", seg_name); Obj *format = Hash_Fetch_Utf8(metadata, "format", 6); // Check format. if (!format) { THROW(ERR, "Missing 'format' var"); } else { int64_t format_val = Json_obj_to_i64(format); if (format_val < DocWriter_current_file_format) { THROW(ERR, "Obsolete doc storage format %i64; " "Index regeneration is required", format_val); } else if (format_val != DocWriter_current_file_format) { THROW(ERR, "Unsupported doc storage format: %i64", format_val); } } // Get streams. if (Folder_Exists(folder, ix_file)) { ivars->ix_in = Folder_Open_In(folder, ix_file); if (!ivars->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } ivars->dat_in = Folder_Open_In(folder, dat_file); if (!ivars->dat_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } } DECREF(ix_file); DECREF(dat_file); } return self; }
sparkey_returncode sparkey_logiter_seek(sparkey_logiter *iter, sparkey_logreader *log, uint64_t position) { RETHROW(assert_iter_open(iter, log)); if (position == log->header.data_end) { iter->state = SPARKEY_ITER_CLOSED; return SPARKEY_SUCCESS; } RETHROW(seekblock(iter, log, position)); iter->entry_count = -1; iter->state = SPARKEY_ITER_NEW; return SPARKEY_SUCCESS; }
static void test_Window(TestBatchRunner *runner) { String *test_filename = SSTR_WRAP_C("_fstest"); FSFileHandle *fh; FileWindow *window = FileWindow_new(); uint32_t i; S_remove(test_filename); fh = FSFH_open(test_filename, FH_CREATE | FH_WRITE_ONLY | FH_EXCLUSIVE); for (i = 0; i < 1024; i++) { FSFH_Write(fh, "foo ", 4); } if (!FSFH_Close(fh)) { RETHROW(INCREF(Err_get_error())); } // Reopen for reading. DECREF(fh); fh = FSFH_open(test_filename, FH_READ_ONLY); if (!fh) { RETHROW(INCREF(Err_get_error())); } Err_set_error(NULL); TEST_FALSE(runner, FSFH_Window(fh, window, -1, 4), "Window() with a negative offset returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Window() with a negative offset sets error"); Err_set_error(NULL); TEST_FALSE(runner, FSFH_Window(fh, window, 4000, 1000), "Window() past EOF returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Window() past EOF sets error"); TEST_TRUE(runner, FSFH_Window(fh, window, 1021, 2), "Window() returns true"); const char *buf = FileWindow_Get_Buf(window); int64_t offset = FileWindow_Get_Offset(window); TEST_TRUE(runner, strncmp(buf - offset + 1021, "oo", 2) == 0, "Window()"); TEST_TRUE(runner, FSFH_Release_Window(fh, window), "Release_Window() returns true"); TEST_TRUE(runner, FileWindow_Get_Buf(window) == NULL, "Release_Window() resets buf"); TEST_INT_EQ(runner, FileWindow_Get_Offset(window), 0, "Release_Window() resets offset"); TEST_INT_EQ(runner, FileWindow_Get_Len(window), 0, "Release_Window() resets len"); DECREF(window); DECREF(fh); S_remove(test_filename); }
LexIndex* LexIndex_init(LexIndex *self, Schema *schema, Folder *folder, Segment *segment, String *field) { int32_t field_num = Seg_Field_Num(segment, field); String *seg_name = Seg_Get_Name(segment); String *ixix_file = Str_newf("%o/lexicon-%i32.ixix", seg_name, field_num); String *ix_file = Str_newf("%o/lexicon-%i32.ix", seg_name, field_num); Architecture *arch = Schema_Get_Architecture(schema); // Init. Lex_init((Lexicon*)self, field); LexIndexIVARS *const ivars = LexIndex_IVARS(self); ivars->tinfo = TInfo_new(0); ivars->tick = 0; // Derive ivars->field_type = Schema_Fetch_Type(schema, field); if (!ivars->field_type) { String *mess = MAKE_MESS("Unknown field: '%o'", field); DECREF(ix_file); DECREF(ixix_file); DECREF(self); Err_throw_mess(ERR, mess); } ivars->field_type = (FieldType*)INCREF(ivars->field_type); ivars->term_stepper = FType_Make_Term_Stepper(ivars->field_type); ivars->ixix_in = Folder_Open_In(folder, ixix_file); if (!ivars->ixix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(ixix_file); DECREF(self); RETHROW(error); } ivars->ix_in = Folder_Open_In(folder, ix_file); if (!ivars->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(ixix_file); DECREF(self); RETHROW(error); } ivars->index_interval = Arch_Index_Interval(arch); ivars->skip_interval = Arch_Skip_Interval(arch); ivars->size = (int32_t)(InStream_Length(ivars->ixix_in) / sizeof(int64_t)); ivars->offsets = (const int64_t*)InStream_Buf(ivars->ixix_in, (size_t)InStream_Length(ivars->ixix_in)); DECREF(ixix_file); DECREF(ix_file); return self; }
DefaultHighlightReader* DefHLReader_init(DefaultHighlightReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, Vector *segments, int32_t seg_tick) { HLReader_init((HighlightReader*)self, schema, folder, snapshot, segments, seg_tick); DefaultHighlightReaderIVARS *const ivars = DefHLReader_IVARS(self); Segment *segment = DefHLReader_Get_Segment(self); Hash *metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "highlight", 9); if (!metadata) { metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "term_vectors", 12); } // Check format. if (metadata) { Obj *format = Hash_Fetch_Utf8(metadata, "format", 6); if (!format) { THROW(ERR, "Missing 'format' var"); } else { if (Json_obj_to_i64(format) != HLWriter_current_file_format) { THROW(ERR, "Unsupported highlight data format: %i64", Json_obj_to_i64(format)); } } } // Open instreams. String *seg_name = Seg_Get_Name(segment); String *ix_file = Str_newf("%o/highlight.ix", seg_name); String *dat_file = Str_newf("%o/highlight.dat", seg_name); if (Folder_Exists(folder, ix_file)) { ivars->ix_in = Folder_Open_In(folder, ix_file); if (!ivars->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } ivars->dat_in = Folder_Open_In(folder, dat_file); if (!ivars->dat_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } } DECREF(ix_file); DECREF(dat_file); return self; }
static void test_Window(TestBatchRunner *runner) { String *test_filename = (String*)SSTR_WRAP_UTF8("_fstest", 7); FSFileHandle *fh; FileWindow *window = FileWindow_new(); FileWindowIVARS *const window_ivars = FileWindow_IVARS(window); uint32_t i; remove(Str_Get_Ptr8(test_filename)); fh = FSFH_open(test_filename, FH_CREATE | FH_WRITE_ONLY | FH_EXCLUSIVE); for (i = 0; i < 1024; i++) { FSFH_Write(fh, "foo ", 4); } if (!FSFH_Close(fh)) { RETHROW(INCREF(Err_get_error())); } // Reopen for reading. DECREF(fh); fh = FSFH_open(test_filename, FH_READ_ONLY); if (!fh) { RETHROW(INCREF(Err_get_error())); } Err_set_error(NULL); TEST_FALSE(runner, FSFH_Window(fh, window, -1, 4), "Window() with a negative offset returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Window() with a negative offset sets error"); Err_set_error(NULL); TEST_FALSE(runner, FSFH_Window(fh, window, 4000, 1000), "Window() past EOF returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Window() past EOF sets error"); TEST_TRUE(runner, FSFH_Window(fh, window, 1021, 2), "Window() returns true"); TEST_TRUE(runner, strncmp(window_ivars->buf - window_ivars->offset + 1021, "oo", 2) == 0, "Window()"); TEST_TRUE(runner, FSFH_Release_Window(fh, window), "Release_Window() returns true"); TEST_TRUE(runner, window_ivars->buf == NULL, "Release_Window() resets buf"); TEST_TRUE(runner, window_ivars->offset == 0, "Release_Window() resets offset"); TEST_TRUE(runner, window_ivars->len == 0, "Release_Window() resets len"); DECREF(window); DECREF(fh); remove(Str_Get_Ptr8(test_filename)); }
void SortFieldWriter_Flip_IMP(SortFieldWriter *self) { SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self); uint32_t num_items = SortFieldWriter_Buffer_Count(self); uint32_t num_runs = Vec_Get_Size(ivars->runs); if (ivars->flipped) { THROW(ERR, "Can't call Flip() twice"); } ivars->flipped = true; // Sanity check. if (num_runs && num_items) { THROW(ERR, "Sanity check failed: num_runs: %u32 num_items: %u32", num_runs, num_items); } if (num_items) { SortFieldWriter_Sort_Buffer(self); } else if (num_runs) { Folder *folder = PolyReader_Get_Folder(ivars->polyreader); String *seg_name = Seg_Get_Name(ivars->segment); String *ord_path = Str_newf("%o/sort_ord_temp", seg_name); ivars->ord_in = Folder_Open_In(folder, ord_path); DECREF(ord_path); if (!ivars->ord_in) { RETHROW(INCREF(Err_get_error())); } if (ivars->var_width) { String *ix_path = Str_newf("%o/sort_ix_temp", seg_name); ivars->ix_in = Folder_Open_In(folder, ix_path); DECREF(ix_path); if (!ivars->ix_in) { RETHROW(INCREF(Err_get_error())); } } String *dat_path = Str_newf("%o/sort_dat_temp", seg_name); ivars->dat_in = Folder_Open_In(folder, dat_path); DECREF(dat_path); if (!ivars->dat_in) { RETHROW(INCREF(Err_get_error())); } // Assign streams and a slice of mem_thresh. size_t sub_thresh = ivars->mem_thresh / num_runs; if (sub_thresh < 65536) { sub_thresh = 65536; } for (uint32_t i = 0; i < num_runs; i++) { SortFieldWriter *run = (SortFieldWriter*)Vec_Fetch(ivars->runs, i); S_flip_run(run, sub_thresh, ivars->ord_in, ivars->ix_in, ivars->dat_in); } } ivars->flipped = true; }
void DefDelWriter_Finish_IMP(DefaultDeletionsWriter *self) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); Folder *const folder = ivars->folder; for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i); if (ivars->updated[i]) { BitVector *deldocs = (BitVector*)VA_Fetch(ivars->bit_vecs, i); int32_t doc_max = SegReader_Doc_Max(seg_reader); double used = (doc_max + 1) / 8.0; uint32_t byte_size = (uint32_t)ceil(used); uint32_t new_max = byte_size * 8 - 1; String *filename = S_del_filename(self, seg_reader); OutStream *outstream = Folder_Open_Out(folder, filename); if (!outstream) { RETHROW(INCREF(Err_get_error())); } // Ensure that we have 1 bit for each doc in segment. BitVec_Grow(deldocs, new_max); // Write deletions data and clean up. OutStream_Write_Bytes(outstream, (char*)BitVec_Get_Raw_Bits(deldocs), byte_size); OutStream_Close(outstream); DECREF(outstream); DECREF(filename); } } Seg_Store_Metadata_Utf8(ivars->segment, "deletions", 9, (Obj*)DefDelWriter_Metadata(self)); }
void Folder_consolidate(Folder *self, const CharBuf *path) { Folder *folder = Folder_Find_Folder(self, path); Folder *enclosing_folder = Folder_Enclosing_Folder(self, path); if (!folder) { THROW(ERR, "Can't consolidate %o", path); } else if (Folder_Is_A(folder, COMPOUNDFILEREADER)) { THROW(ERR, "Can't consolidate %o twice", path); } else { CompoundFileWriter *cf_writer = CFWriter_new(folder); CFWriter_Consolidate(cf_writer); DECREF(cf_writer); if (CB_Get_Size(path)) { ZombieCharBuf *name = IxFileNames_local_part(path, ZCB_BLANK()); CompoundFileReader *cf_reader = CFReader_open(folder); if (!cf_reader) { RETHROW(INCREF(Err_get_error())); } Hash_Store(enclosing_folder->entries, (Obj*)name, (Obj*)cf_reader); } } }
String* IxFileNames_latest_snapshot(Folder *folder) { DirHandle *dh = Folder_Open_Dir(folder, NULL); String *retval = NULL; uint64_t latest_gen = 0; if (!dh) { RETHROW(INCREF(Err_get_error())); } while (DH_Next(dh)) { String *entry = DH_Get_Entry(dh); if (Str_Starts_With_Utf8(entry, "snapshot_", 9) && Str_Ends_With_Utf8(entry, ".json", 5) ) { uint64_t gen = IxFileNames_extract_gen(entry); if (gen > latest_gen) { latest_gen = gen; DECREF(retval); retval = Str_Clone(entry); } } DECREF(entry); } DECREF(dh); return retval; }
String* IxManager_Make_Snapshot_Filename_IMP(IndexManager *self) { IndexManagerIVARS *const ivars = IxManager_IVARS(self); Folder *folder = (Folder*)CERTIFY(ivars->folder, FOLDER); DirHandle *dh = Folder_Open_Dir(folder, NULL); uint64_t max_gen = 0; if (!dh) { RETHROW(INCREF(Err_get_error())); } while (DH_Next(dh)) { String *entry = DH_Get_Entry(dh); if (Str_Starts_With_Utf8(entry, "snapshot_", 9) && Str_Ends_With_Utf8(entry, ".json", 5) ) { uint64_t gen = IxFileNames_extract_gen(entry); if (gen > max_gen) { max_gen = gen; } } DECREF(entry); } DECREF(dh); uint64_t new_gen = max_gen + 1; char base36[StrHelp_MAX_BASE36_BYTES]; StrHelp_to_base36(new_gen, &base36); return Str_newf("snapshot_%s.json", &base36); }
static void S_fill(InStream *self, int64_t amount) { FileWindow *const window = self->window; const int64_t virtual_file_pos = SI_tell(self); const int64_t real_file_pos = virtual_file_pos + self->offset; const int64_t remaining = self->len - virtual_file_pos; // Throw an error if the requested amount would take us beyond EOF. if (amount > remaining) { THROW(ERR, "Read past EOF of %o (pos: %u64 len: %u64 request: %u64)", self->filename, virtual_file_pos, self->len, amount); } // Make the request. if (FH_Window(self->file_handle, window, real_file_pos, amount)) { char *const window_limit = window->buf + window->len; self->buf = window->buf - window->offset // theoretical start of real file + self->offset // top of virtual file + virtual_file_pos; // position within virtual file self->limit = window_limit - self->buf > remaining ? self->buf + remaining : window_limit; } else { Err *error = Err_get_error(); CB_catf(Err_Get_Mess(error), " (%o)", self->filename); RETHROW(INCREF(error)); } }
void Indexer_commit(Indexer *self) { // Safety check. if ( !self->write_lock ) { THROW(ERR, "Can't call commit() more than once"); } if (!self->prepared) { Indexer_Prepare_Commit(self); } if (self->needs_commit) { bool_t success; // Rename temp snapshot file. CharBuf *temp_snapfile = CB_Clone(self->snapfile); CB_Chop(self->snapfile, sizeof(".temp") - 1); Snapshot_Set_Path(self->snapshot, self->snapfile); success = Folder_Rename(self->folder, temp_snapfile, self->snapfile); DECREF(temp_snapfile); if (!success) { RETHROW(INCREF(Err_get_error())); } // Purge obsolete files. FilePurger_Purge(self->file_purger); } // Release locks, invalidating the Indexer. S_release_merge_lock(self); S_release_write_lock(self); }
static void test_stemming(TestBatchRunner *runner) { FSFolder *modules_folder = TestUtils_modules_folder(); String *path = Str_newf("analysis/snowstem/source/test/tests.json"); Hash *tests = (Hash*)Json_slurp_json((Folder*)modules_folder, path); if (!tests) { RETHROW(Err_get_error()); } String *iso; Hash *lang_data; Hash_Iterate(tests); while (Hash_Next(tests, (Obj**)&iso, (Obj**)&lang_data)) { VArray *words = (VArray*)Hash_Fetch_Utf8(lang_data, "words", 5); VArray *stems = (VArray*)Hash_Fetch_Utf8(lang_data, "stems", 5); SnowballStemmer *stemmer = SnowStemmer_new(iso); for (uint32_t i = 0, max = VA_Get_Size(words); i < max; i++) { String *word = (String*)VA_Fetch(words, i); VArray *got = SnowStemmer_Split(stemmer, word); String *stem = (String*)VA_Fetch(got, 0); TEST_TRUE(runner, stem && Str_Is_A(stem, STRING) && Str_Equals(stem, VA_Fetch(stems, i)), "Stem %s: %s", Str_Get_Ptr8(iso), Str_Get_Ptr8(word) ); DECREF(got); } DECREF(stemmer); } DECREF(tests); DECREF(modules_folder); DECREF(path); }
DPtr<uint8_t> *RIFConst::toUTF8String() const throw(BadAllocException) { DPtr<uint8_t> *esclex; try { esclex = RIFConst::escape(this->lex); } JUST_RETHROW(BadAllocException, "(rethrow)") DPtr<uint8_t> *iristr = this->datatype.getUTF8String(); size_t sz = esclex->size() + iristr->size() + 6; DPtr<uint8_t> *utf8str; try { NEW(utf8str, MPtr<uint8_t>, sz); } catch (BadAllocException &e) { esclex->drop(); iristr->drop(); RETHROW(e, "(rethrow)"); } catch (bad_alloc &e) { esclex->drop(); iristr->drop(); THROW(BadAllocException, sizeof(MPtr<uint8_t>)); } uint8_t *utf8p = utf8str->dptr(); *utf8p = to_ascii('"'); memcpy(++utf8p, esclex->dptr(), esclex->size() * sizeof(uint8_t)); utf8p += esclex->size(); ascii_strcpy(utf8p, "\"^^<"); utf8p += 4; memcpy(utf8p, iristr->dptr(), iristr->size() * sizeof(uint8_t)); utf8p += iristr->size(); *utf8p = to_ascii('>'); esclex->drop(); iristr->drop(); return utf8str; }
static void S_fill(InStream *self, int64_t amount) { InStreamIVARS *const ivars = InStream_IVARS(self); FileWindow *const window = ivars->window; const int64_t virtual_file_pos = SI_tell(self); const int64_t real_file_pos = virtual_file_pos + ivars->offset; const int64_t remaining = ivars->len - virtual_file_pos; // Throw an error if the requested amount would take us beyond EOF. if (amount > remaining) { THROW(ERR, "Read past EOF of %o (pos: %u64 len: %u64 request: %u64)", ivars->filename, virtual_file_pos, ivars->len, amount); } // Make the request. if (FH_Window(ivars->file_handle, window, real_file_pos, amount)) { char *fw_buf = FileWindow_Get_Buf(window); int64_t fw_offset = FileWindow_Get_Offset(window); int64_t fw_len = FileWindow_Get_Len(window); char *const window_limit = fw_buf + fw_len; ivars->buf = fw_buf - fw_offset // theoretical start of real file + ivars->offset // top of virtual file + virtual_file_pos; // position within virtual file ivars->limit = window_limit - ivars->buf > remaining ? ivars->buf + remaining : window_limit; } else { Err *error = Err_get_error(); CB_catf(Err_Get_Mess(error), " (%o)", ivars->filename); RETHROW(INCREF(error)); } }
ByteBuf* Folder_slurp_file(Folder *self, const CharBuf *path) { InStream *instream = Folder_Open_In(self, path); ByteBuf *retval = NULL; if (!instream) { RETHROW(INCREF(Err_get_error())); } else { uint64_t length = InStream_Length(instream); if (length >= SIZE_MAX) { InStream_Close(instream); DECREF(instream); THROW(ERR, "File %o is too big to slurp (%u64 bytes)", path, length); } else { size_t size = (size_t)length; char *ptr = (char*)MALLOCATE((size_t)size + 1); InStream_Read_Bytes(instream, ptr, size); ptr[size] = '\0'; retval = BB_new_steal_bytes(ptr, size, size + 1); InStream_Close(instream); DECREF(instream); } } return retval; }
void OutStream_Grow_IMP(OutStream *self, int64_t length) { OutStreamIVARS *const ivars = OutStream_IVARS(self); if (!FH_Grow(ivars->file_handle, length)) { RETHROW(INCREF(Err_get_error())); } }
CharBuf* IxManager_make_snapshot_filename(IndexManager *self) { Folder *folder = (Folder*)CERTIFY(self->folder, FOLDER); DirHandle *dh = Folder_Open_Dir(folder, NULL); CharBuf *entry; uint64_t max_gen = 0; if (!dh) { RETHROW(INCREF(Err_get_error())); } entry = DH_Get_Entry(dh); while (DH_Next(dh)) { if ( CB_Starts_With_Str(entry, "snapshot_", 9) && CB_Ends_With_Str(entry, ".json", 5) ) { uint64_t gen = IxFileNames_extract_gen(entry); if (gen > max_gen) { max_gen = gen; } } } DECREF(dh); { uint64_t new_gen = max_gen + 1; char base36[StrHelp_MAX_BASE36_BYTES]; StrHelp_to_base36(new_gen, &base36); return CB_newf("snapshot_%s.json", &base36); } }
SegReader* SegReader_init(SegReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, Vector *segments, int32_t seg_tick) { Segment *segment; IxReader_init((IndexReader*)self, schema, folder, snapshot, segments, seg_tick, NULL); SegReaderIVARS *const ivars = SegReader_IVARS(self); segment = SegReader_Get_Segment(self); ivars->doc_max = (int32_t)Seg_Get_Count(segment); ivars->seg_name = (String*)INCREF(Seg_Get_Name(segment)); ivars->seg_num = Seg_Get_Number(segment); Err *error = Err_trap(S_try_init_components, self); if (error) { // An error occurred, so clean up self and rethrow the exception. DECREF(self); RETHROW(error); } DeletionsReader *del_reader = (DeletionsReader*)Hash_Fetch( ivars->components, Class_Get_Name(DELETIONSREADER)); ivars->del_count = del_reader ? DelReader_Del_Count(del_reader) : 0; return self; }
sparkey_returncode sparkey_logiter_skip(sparkey_logiter *iter, sparkey_logreader *log, int count) { while (count > 0) { count--; RETHROW(sparkey_logiter_next(iter, log)); } return SPARKEY_SUCCESS; }