static Schema* S_create_schema() { // Create a new schema. Schema *schema = Schema_new(); // Create an analyzer. String *language = Str_newf("en"); EasyAnalyzer *analyzer = EasyAnalyzer_new(language); // Specify fields. FullTextType *type = FullTextType_new((Analyzer*)analyzer); { String *field_str = Str_newf("title"); Schema_Spec_Field(schema, field_str, (FieldType*)type); DECREF(field_str); } { String *field_str = Str_newf("content"); Schema_Spec_Field(schema, field_str, (FieldType*)type); DECREF(field_str); } DECREF(language); DECREF(analyzer); DECREF(type); return schema; }
InStream* CFReader_Local_Open_In_IMP(CompoundFileReader *self, String *name) { CompoundFileReaderIVARS *const ivars = CFReader_IVARS(self); Hash *entry = (Hash*)Hash_Fetch(ivars->records, name); if (!entry) { InStream *instream = Folder_Local_Open_In(ivars->real_folder, name); if (!instream) { ERR_ADD_FRAME(Err_get_error()); } return instream; } else { Obj *len = Hash_Fetch_Utf8(entry, "length", 6); Obj *offset = Hash_Fetch_Utf8(entry, "offset", 6); if (!len || !offset) { Err_set_error(Err_new(Str_newf("Malformed entry for '%o' in '%o'", name, Folder_Get_Path(ivars->real_folder)))); return NULL; } else if (Str_Get_Size(ivars->path)) { String *fullpath = Str_newf("%o/%o", ivars->path, name); InStream *instream = InStream_Reopen(ivars->instream, fullpath, Obj_To_I64(offset), Obj_To_I64(len)); DECREF(fullpath); return instream; } else { return InStream_Reopen(ivars->instream, name, Obj_To_I64(offset), Obj_To_I64(len)); } } }
static void S_add_document(Indexer *indexer, const char *title, const char *content) { Doc *doc = Doc_new(NULL, 0); { // Store 'title' field String *field_str = Str_newf("title"); String *value_str = Str_new_from_utf8(title, strlen(title)); Doc_Store(doc, field_str, (Obj*)value_str); DECREF(field_str); DECREF(value_str); } { // Store 'content' field String *field_str = Str_newf("content"); String *value_str = Str_new_from_utf8(content, strlen(content)); Doc_Store(doc, field_str, (Obj*)value_str); DECREF(field_str); DECREF(value_str); } Indexer_Add_Doc(indexer, doc, 1.0); DECREF(doc); }
Hash* FullTextType_Dump_For_Schema_IMP(FullTextType *self) { FullTextTypeIVARS *const ivars = FullTextType_IVARS(self); Hash *dump = Hash_new(0); Hash_Store_Utf8(dump, "type", 4, (Obj*)Str_newf("fulltext")); // Store attributes that override the defaults. if (ivars->boost != 1.0) { Hash_Store_Utf8(dump, "boost", 5, (Obj*)Str_newf("%f64", ivars->boost)); } if (!ivars->indexed) { Hash_Store_Utf8(dump, "indexed", 7, (Obj*)CFISH_FALSE); } if (!ivars->stored) { Hash_Store_Utf8(dump, "stored", 6, (Obj*)CFISH_FALSE); } if (ivars->sortable) { Hash_Store_Utf8(dump, "sortable", 8, (Obj*)CFISH_TRUE); } if (ivars->highlightable) { Hash_Store_Utf8(dump, "highlightable", 13, (Obj*)CFISH_TRUE); } return dump; }
static OutStream* S_lazy_init(HighlightWriter *self) { HighlightWriterIVARS *const ivars = HLWriter_IVARS(self); if (!ivars->dat_out) { Segment *segment = ivars->segment; Folder *folder = ivars->folder; String *seg_name = Seg_Get_Name(segment); // Open outstreams. String *ix_file = Str_newf("%o/highlight.ix", seg_name); ivars->ix_out = Folder_Open_Out(folder, ix_file); DECREF(ix_file); if (!ivars->ix_out) { RETHROW(INCREF(Err_get_error())); } String *dat_file = Str_newf("%o/highlight.dat", seg_name); ivars->dat_out = Folder_Open_Out(folder, dat_file); DECREF(dat_file); if (!ivars->dat_out) { RETHROW(INCREF(Err_get_error())); } // Go past invalid doc 0. OutStream_Write_I64(ivars->ix_out, 0); } return ivars->dat_out; }
void PostPool_Flip_IMP(PostingPool *self) { PostingPoolIVARS *const ivars = PostPool_IVARS(self); uint32_t num_runs = VA_Get_Size(ivars->runs); uint32_t sub_thresh = num_runs > 0 ? ivars->mem_thresh / num_runs : ivars->mem_thresh; if (num_runs) { Folder *folder = PolyReader_Get_Folder(ivars->polyreader); String *seg_name = Seg_Get_Name(ivars->segment); String *lex_temp_path = Str_newf("%o/lextemp", seg_name); String *post_temp_path = Str_newf("%o/ptemp", seg_name); ivars->lex_temp_in = Folder_Open_In(folder, lex_temp_path); if (!ivars->lex_temp_in) { RETHROW(INCREF(Err_get_error())); } ivars->post_temp_in = Folder_Open_In(folder, post_temp_path); if (!ivars->post_temp_in) { RETHROW(INCREF(Err_get_error())); } DECREF(lex_temp_path); DECREF(post_temp_path); } PostPool_Sort_Buffer(self); if (num_runs && (ivars->buf_max - ivars->buf_tick) > 0) { uint32_t num_items = PostPool_Buffer_Count(self); // Cheap imitation of flush. FIXME. PostingPool *run = PostPool_new(ivars->schema, ivars->snapshot, ivars->segment, ivars->polyreader, ivars->field, ivars->lex_writer, ivars->mem_pool, ivars->lex_temp_out, ivars->post_temp_out, ivars->skip_out); PostPool_Grow_Buffer(run, num_items); PostingPoolIVARS *const run_ivars = PostPool_IVARS(run); memcpy(run_ivars->buffer, (ivars->buffer) + ivars->buf_tick, num_items * sizeof(Obj*)); run_ivars->buf_max = num_items; PostPool_Add_Run(self, (SortExternal*)run); ivars->buf_tick = 0; ivars->buf_max = 0; } // Assign. for (uint32_t i = 0; i < num_runs; i++) { PostingPool *run = (PostingPool*)VA_Fetch(ivars->runs, i); if (run != NULL) { PostPool_Set_Mem_Thresh(run, sub_thresh); if (!PostPool_IVARS(run)->lexicon) { S_fresh_flip(run, ivars->lex_temp_in, ivars->post_temp_in); } } } ivars->flipped = true; }
void Bool_init_class() { Bool_true_singleton = (BoolNum*)VTable_Make_Obj(BOOLNUM); Bool_true_singleton->value = true; Bool_true_singleton->string = Str_newf("true"); Bool_false_singleton = (BoolNum*)VTable_Make_Obj(BOOLNUM); Bool_false_singleton->value = false; Bool_false_singleton->string = Str_newf("false"); }
DefaultDocReader* DefDocReader_init(DefaultDocReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, Vector *segments, int32_t seg_tick) { Hash *metadata; Segment *segment; DocReader_init((DocReader*)self, schema, folder, snapshot, segments, seg_tick); DefaultDocReaderIVARS *const ivars = DefDocReader_IVARS(self); segment = DefDocReader_Get_Segment(self); metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "documents", 9); if (metadata) { String *seg_name = Seg_Get_Name(segment); String *ix_file = Str_newf("%o/documents.ix", seg_name); String *dat_file = Str_newf("%o/documents.dat", seg_name); Obj *format = Hash_Fetch_Utf8(metadata, "format", 6); // Check format. if (!format) { THROW(ERR, "Missing 'format' var"); } else { int64_t format_val = Json_obj_to_i64(format); if (format_val < DocWriter_current_file_format) { THROW(ERR, "Obsolete doc storage format %i64; " "Index regeneration is required", format_val); } else if (format_val != DocWriter_current_file_format) { THROW(ERR, "Unsupported doc storage format: %i64", format_val); } } // Get streams. if (Folder_Exists(folder, ix_file)) { ivars->ix_in = Folder_Open_In(folder, ix_file); if (!ivars->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } ivars->dat_in = Folder_Open_In(folder, dat_file); if (!ivars->dat_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } } DECREF(ix_file); DECREF(dat_file); } return self; }
void BGMerger_Commit_IMP(BackgroundMerger *self) { BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self); // Safety check. if (!ivars->merge_lock) { THROW(ERR, "Can't call commit() more than once"); } if (!ivars->prepared) { BGMerger_Prepare_Commit(self); } if (ivars->needs_commit) { bool success = false; String *temp_snapfile = ivars->snapfile; // Rename temp snapshot file. size_t ext_len = sizeof(".temp") - 1; size_t snapfile_len = Str_Length(temp_snapfile); if (snapfile_len <= ext_len) { THROW(ERR, "Invalid snapfile name: %o", temp_snapfile); } ivars->snapfile = Str_SubString(temp_snapfile, 0, snapfile_len - ext_len); success = Folder_Hard_Link(ivars->folder, temp_snapfile, ivars->snapfile); Snapshot_Set_Path(ivars->snapshot, ivars->snapfile); if (!success) { String *mess = Str_newf("Can't create hard link from %o to %o", temp_snapfile, ivars->snapfile); DECREF(temp_snapfile); Err_throw_mess(ERR, mess); } if (!Folder_Delete(ivars->folder, temp_snapfile)) { String *mess = Str_newf("Can't delete %o", temp_snapfile); DECREF(temp_snapfile); Err_throw_mess(ERR, mess); } DECREF(temp_snapfile); } // Release the merge lock and remove the merge data file. S_release_merge_lock(self); IxManager_Remove_Merge_Data(ivars->manager); if (ivars->needs_commit) { // Purge obsolete files. FilePurger_Purge(ivars->file_purger); } // Release the write lock. S_release_write_lock(self); }
static void test_analysis(TestBatchRunner *runner) { CaseFolder *case_folder = CaseFolder_new(); String *source = Str_newf("caPiTal ofFensE"); VArray *wanted = VA_new(1); VA_Push(wanted, (Obj*)Str_newf("capital offense")); TestUtils_test_analyzer(runner, (Analyzer*)case_folder, source, wanted, "lowercase plain text"); DECREF(wanted); DECREF(source); DECREF(case_folder); }
LexIndex* LexIndex_init(LexIndex *self, Schema *schema, Folder *folder, Segment *segment, String *field) { int32_t field_num = Seg_Field_Num(segment, field); String *seg_name = Seg_Get_Name(segment); String *ixix_file = Str_newf("%o/lexicon-%i32.ixix", seg_name, field_num); String *ix_file = Str_newf("%o/lexicon-%i32.ix", seg_name, field_num); Architecture *arch = Schema_Get_Architecture(schema); // Init. Lex_init((Lexicon*)self, field); LexIndexIVARS *const ivars = LexIndex_IVARS(self); ivars->tinfo = TInfo_new(0); ivars->tick = 0; // Derive ivars->field_type = Schema_Fetch_Type(schema, field); if (!ivars->field_type) { String *mess = MAKE_MESS("Unknown field: '%o'", field); DECREF(ix_file); DECREF(ixix_file); DECREF(self); Err_throw_mess(ERR, mess); } ivars->field_type = (FieldType*)INCREF(ivars->field_type); ivars->term_stepper = FType_Make_Term_Stepper(ivars->field_type); ivars->ixix_in = Folder_Open_In(folder, ixix_file); if (!ivars->ixix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(ixix_file); DECREF(self); RETHROW(error); } ivars->ix_in = Folder_Open_In(folder, ix_file); if (!ivars->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(ixix_file); DECREF(self); RETHROW(error); } ivars->index_interval = Arch_Index_Interval(arch); ivars->skip_interval = Arch_Skip_Interval(arch); ivars->size = (int32_t)(InStream_Length(ivars->ixix_in) / sizeof(int64_t)); ivars->offsets = (const int64_t*)InStream_Buf(ivars->ixix_in, (size_t)InStream_Length(ivars->ixix_in)); DECREF(ixix_file); DECREF(ix_file); return self; }
DefaultHighlightReader* DefHLReader_init(DefaultHighlightReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, Vector *segments, int32_t seg_tick) { HLReader_init((HighlightReader*)self, schema, folder, snapshot, segments, seg_tick); DefaultHighlightReaderIVARS *const ivars = DefHLReader_IVARS(self); Segment *segment = DefHLReader_Get_Segment(self); Hash *metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "highlight", 9); if (!metadata) { metadata = (Hash*)Seg_Fetch_Metadata_Utf8(segment, "term_vectors", 12); } // Check format. if (metadata) { Obj *format = Hash_Fetch_Utf8(metadata, "format", 6); if (!format) { THROW(ERR, "Missing 'format' var"); } else { if (Json_obj_to_i64(format) != HLWriter_current_file_format) { THROW(ERR, "Unsupported highlight data format: %i64", Json_obj_to_i64(format)); } } } // Open instreams. String *seg_name = Seg_Get_Name(segment); String *ix_file = Str_newf("%o/highlight.ix", seg_name); String *dat_file = Str_newf("%o/highlight.dat", seg_name); if (Folder_Exists(folder, ix_file)) { ivars->ix_in = Folder_Open_In(folder, ix_file); if (!ivars->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } ivars->dat_in = Folder_Open_In(folder, dat_file); if (!ivars->dat_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(dat_file); DECREF(self); RETHROW(error); } } DECREF(ix_file); DECREF(dat_file); return self; }
int main(int argc, char *argv[]) { // Initialize the library. lucy_bootstrap_parcel(); if (argc != 2) { S_usage_and_exit(argv[0]); } const char *query_c = argv[1]; printf("Searching for: %s\n\n", query_c); String *folder = Str_newf("%s", path_to_index); String *language = Str_newf("en"); Simple *lucy = Simple_new((Obj*)folder, language); String *query_str = Str_newf("%s", query_c); Simple_Search(lucy, query_str, 0, 10); String *title_str = Str_newf("title"); String *url_str = Str_newf("url"); HitDoc *hit; int i = 1; // Loop over search results. while (NULL != (hit = Simple_Next(lucy))) { String *title = (String*)HitDoc_Extract(hit, title_str); char *title_c = Str_To_Utf8(title); String *url = (String*)HitDoc_Extract(hit, url_str); char *url_c = Str_To_Utf8(url); printf("Result %d: %s (%s)\n", i, title_c, url_c); free(url_c); free(title_c); DECREF(url); DECREF(title); DECREF(hit); i++; } DECREF(url_str); DECREF(title_str); DECREF(query_str); DECREF(lucy); DECREF(language); DECREF(folder); return 0; }
static Schema* S_create_schema() { // Create a new schema. Schema *schema = Schema_new(); // Create an analyzer. String *language = Str_newf("en"); EasyAnalyzer *analyzer = EasyAnalyzer_new(language); // Specify fields. { String *field_str = Str_newf("title"); FullTextType *type = FullTextType_new((Analyzer*)analyzer); Schema_Spec_Field(schema, field_str, (FieldType*)type); DECREF(type); DECREF(field_str); } { String *field_str = Str_newf("content"); FullTextType *type = FullTextType_new((Analyzer*)analyzer); FullTextType_Set_Highlightable(type, true); Schema_Spec_Field(schema, field_str, (FieldType*)type); DECREF(type); DECREF(field_str); } { String *field_str = Str_newf("url"); StringType *type = StringType_new(); StringType_Set_Indexed(type, false); Schema_Spec_Field(schema, field_str, (FieldType*)type); DECREF(type); DECREF(field_str); } { String *field_str = Str_newf("category"); StringType *type = StringType_new(); StringType_Set_Stored(type, false); Schema_Spec_Field(schema, field_str, (FieldType*)type); DECREF(type); DECREF(field_str); } DECREF(analyzer); DECREF(language); return schema; }
String* Obj_To_String_IMP(Obj *self) { #if (SIZEOF_PTR == 4) return Str_newf("%o@0x%x32", Obj_Get_Class_Name(self), self); #elif (SIZEOF_PTR == 8) int64_t iaddress = PTR_TO_I64(self); uint64_t address = (uint64_t)iaddress; uint32_t address_hi = address >> 32; uint32_t address_lo = address & 0xFFFFFFFF; return Str_newf("%o@0x%x32%x32", Obj_Get_Class_Name(self), address_hi, address_lo); #else #error "Unexpected pointer size." #endif }
void SortFieldWriter_Flip_IMP(SortFieldWriter *self) { SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self); uint32_t num_items = SortFieldWriter_Buffer_Count(self); uint32_t num_runs = Vec_Get_Size(ivars->runs); if (ivars->flipped) { THROW(ERR, "Can't call Flip() twice"); } ivars->flipped = true; // Sanity check. if (num_runs && num_items) { THROW(ERR, "Sanity check failed: num_runs: %u32 num_items: %u32", num_runs, num_items); } if (num_items) { SortFieldWriter_Sort_Buffer(self); } else if (num_runs) { Folder *folder = PolyReader_Get_Folder(ivars->polyreader); String *seg_name = Seg_Get_Name(ivars->segment); String *ord_path = Str_newf("%o/sort_ord_temp", seg_name); ivars->ord_in = Folder_Open_In(folder, ord_path); DECREF(ord_path); if (!ivars->ord_in) { RETHROW(INCREF(Err_get_error())); } if (ivars->var_width) { String *ix_path = Str_newf("%o/sort_ix_temp", seg_name); ivars->ix_in = Folder_Open_In(folder, ix_path); DECREF(ix_path); if (!ivars->ix_in) { RETHROW(INCREF(Err_get_error())); } } String *dat_path = Str_newf("%o/sort_dat_temp", seg_name); ivars->dat_in = Folder_Open_In(folder, dat_path); DECREF(dat_path); if (!ivars->dat_in) { RETHROW(INCREF(Err_get_error())); } // Assign streams and a slice of mem_thresh. size_t sub_thresh = ivars->mem_thresh / num_runs; if (sub_thresh < 65536) { sub_thresh = 65536; } for (uint32_t i = 0; i < num_runs; i++) { SortFieldWriter *run = (SortFieldWriter*)Vec_Fetch(ivars->runs, i); S_flip_run(run, sub_thresh, ivars->ord_in, ivars->ix_in, ivars->dat_in); } } ivars->flipped = true; }
Hash* DefDelWriter_Metadata_IMP(DefaultDeletionsWriter *self) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); DefDelWriter_Metadata_t super_meta = (DefDelWriter_Metadata_t)SUPER_METHOD_PTR(DEFAULTDELETIONSWRITER, LUCY_DefDelWriter_Metadata); Hash *const metadata = super_meta(self); Hash *const files = Hash_new(0); for (uint32_t i = 0, max = VA_Get_Size(ivars->seg_readers); i < max; i++) { SegReader *seg_reader = (SegReader*)VA_Fetch(ivars->seg_readers, i); if (ivars->updated[i]) { BitVector *deldocs = (BitVector*)VA_Fetch(ivars->bit_vecs, i); Segment *segment = SegReader_Get_Segment(seg_reader); Hash *mini_meta = Hash_new(2); Hash_Store_Utf8(mini_meta, "count", 5, (Obj*)Str_newf("%u32", (uint32_t)BitVec_Count(deldocs))); Hash_Store_Utf8(mini_meta, "filename", 8, (Obj*)S_del_filename(self, seg_reader)); Hash_Store(files, (Obj*)Seg_Get_Name(segment), (Obj*)mini_meta); } } Hash_Store_Utf8(metadata, "files", 5, (Obj*)files); return metadata; }
static void test_Cat(TestBatchRunner *runner) { String *wanted = Str_newf("a%s", smiley); String *source; String *got; source = S_get_str(""); got = Str_Cat(source, wanted); TEST_TRUE(runner, Str_Equals(wanted, (Obj*)got), "Cat"); DECREF(got); DECREF(source); source = S_get_str("a"); got = Str_Cat_Utf8(source, smiley, smiley_len); TEST_TRUE(runner, Str_Equals(wanted, (Obj*)got), "Cat_Utf8"); DECREF(got); DECREF(source); source = S_get_str("a"); got = Str_Cat_Trusted_Utf8(source, smiley, smiley_len); TEST_TRUE(runner, Str_Equals(wanted, (Obj*)got), "Cat_Trusted_Utf8"); DECREF(got); DECREF(source); DECREF(wanted); }
static void test_Cat(TestBatchRunner *runner) { String *wanted = Str_newf("a%s", smiley); CharBuf *got = S_get_cb(""); CB_Cat(got, wanted); TEST_TRUE(runner, S_cb_equals(got, wanted), "Cat"); DECREF(got); got = S_get_cb("a"); CB_Cat_Char(got, 0x263A); TEST_TRUE(runner, S_cb_equals(got, wanted), "Cat_Char"); DECREF(got); got = S_get_cb("a"); CB_Cat_Utf8(got, smiley, smiley_len); TEST_TRUE(runner, S_cb_equals(got, wanted), "Cat_Utf8"); DECREF(got); got = S_get_cb("a"); CB_Cat_Trusted_Utf8(got, smiley, smiley_len); TEST_TRUE(runner, S_cb_equals(got, wanted), "Cat_Trusted_Utf8"); DECREF(got); DECREF(wanted); }
static String* S_del_filename(DefaultDeletionsWriter *self, SegReader *target_reader) { DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); Segment *target_seg = SegReader_Get_Segment(target_reader); return Str_newf("%o/deletions-%o.bv", Seg_Get_Name(ivars->segment), Seg_Get_Name(target_seg)); }
// Create a test data structure including at least one each of Hash, Vector, // and String. static Obj* S_make_dump() { Hash *dump = Hash_new(0); Hash_Store_Utf8(dump, "foo", 3, (Obj*)Str_newf("foo")); Hash_Store_Utf8(dump, "stuff", 5, (Obj*)Vec_new(0)); return (Obj*)dump; }
void Bool_init_class() { Boolean *true_obj = (Boolean*)Class_Make_Obj(BOOLEAN); true_obj->value = true; true_obj->string = Str_newf("true"); if (!Atomic_cas_ptr((void**)&Bool_true_singleton, NULL, true_obj)) { Bool_Destroy(true_obj); } Boolean *false_obj = (Boolean*)Class_Make_Obj(BOOLEAN); false_obj->value = false; false_obj->string = Str_newf("false"); if (!Atomic_cas_ptr((void**)&Bool_false_singleton, NULL, false_obj)) { Bool_Destroy(false_obj); } }
String* IxManager_Make_Snapshot_Filename_IMP(IndexManager *self) { IndexManagerIVARS *const ivars = IxManager_IVARS(self); Folder *folder = (Folder*)CERTIFY(ivars->folder, FOLDER); DirHandle *dh = Folder_Open_Dir(folder, NULL); uint64_t max_gen = 0; if (!dh) { RETHROW(INCREF(Err_get_error())); } while (DH_Next(dh)) { String *entry = DH_Get_Entry(dh); if (Str_Starts_With_Utf8(entry, "snapshot_", 9) && Str_Ends_With_Utf8(entry, ".json", 5) ) { uint64_t gen = IxFileNames_extract_gen(entry); if (gen > max_gen) { max_gen = gen; } } DECREF(entry); } DECREF(dh); uint64_t new_gen = max_gen + 1; char base36[StrHelp_MAX_BASE36_BYTES]; StrHelp_to_base36(new_gen, &base36); return Str_newf("snapshot_%s.json", &base36); }
Hash* SnowStop_gen_stoplist(String *language) { char lang[2]; lang[0] = tolower(Str_Code_Point_At(language, 0)); lang[1] = tolower(Str_Code_Point_At(language, 1)); const uint8_t **words = NULL; if (memcmp(lang, "da", 2) == 0) { words = SnowStop_snow_da; } else if (memcmp(lang, "de", 2) == 0) { words = SnowStop_snow_de; } else if (memcmp(lang, "en", 2) == 0) { words = SnowStop_snow_en; } else if (memcmp(lang, "es", 2) == 0) { words = SnowStop_snow_es; } else if (memcmp(lang, "fi", 2) == 0) { words = SnowStop_snow_fi; } else if (memcmp(lang, "fr", 2) == 0) { words = SnowStop_snow_fr; } else if (memcmp(lang, "hu", 2) == 0) { words = SnowStop_snow_hu; } else if (memcmp(lang, "it", 2) == 0) { words = SnowStop_snow_it; } else if (memcmp(lang, "nl", 2) == 0) { words = SnowStop_snow_nl; } else if (memcmp(lang, "no", 2) == 0) { words = SnowStop_snow_no; } else if (memcmp(lang, "pt", 2) == 0) { words = SnowStop_snow_pt; } else if (memcmp(lang, "ru", 2) == 0) { words = SnowStop_snow_ru; } else if (memcmp(lang, "sv", 2) == 0) { words = SnowStop_snow_sv; } else { return NULL; } size_t num_stopwords = 0; for (uint32_t i = 0; words[i] != NULL; i++) { num_stopwords++; } NoCloneHash *stoplist = NoCloneHash_new(num_stopwords); for (uint32_t i = 0; words[i] != NULL; i++) { char *word = (char*)words[i]; String *stop = Str_new_wrap_trusted_utf8(word, strlen(word)); NoCloneHash_Store(stoplist, (Obj*)stop, (Obj*)Str_newf("")); DECREF(stop); } return (Hash*)stoplist; }
static void test_Write_File_and_Read_File(TestBatchRunner *runner) { RAMFolder *folder = RAMFolder_new(NULL); Segment *segment = Seg_new(100); Segment *got = Seg_new(100); String *meta; String *flotsam = (String*)SSTR_WRAP_UTF8("flotsam", 7); String *jetsam = (String*)SSTR_WRAP_UTF8("jetsam", 6); Seg_Set_Count(segment, 111); Seg_Store_Metadata_Utf8(segment, "foo", 3, (Obj*)Str_newf("bar")); Seg_Add_Field(segment, flotsam); Seg_Add_Field(segment, jetsam); RAMFolder_MkDir(folder, Seg_Get_Name(segment)); Seg_Write_File(segment, (Folder*)folder); Seg_Read_File(got, (Folder*)folder); TEST_TRUE(runner, Seg_Get_Count(got) == Seg_Get_Count(segment), "Round-trip count through file"); TEST_TRUE(runner, Seg_Field_Num(got, jetsam) == Seg_Field_Num(segment, jetsam), "Round trip field names through file"); meta = (String*)Seg_Fetch_Metadata_Utf8(got, "foo", 3); TEST_TRUE(runner, meta && Str_Is_A(meta, STRING) && Str_Equals_Utf8(meta, "bar", 3), "Round trip metadata through file"); DECREF(got); DECREF(segment); DECREF(folder); }
static void S_search(IndexSearcher *searcher, const char *query) { printf("Searching for: %s\n", query); // Execute search query. String *query_str = Str_new_from_utf8(query, strlen(query)); Hits *hits = IxSearcher_Hits(searcher, (Obj*)query_str, 0, 10, NULL); String *field_str = Str_newf("title"); HitDoc *hit; int i = 1; // Loop over search results. while (NULL != (hit = Hits_Next(hits))) { String *value_str = (String*)HitDoc_Extract(hit, field_str); char *value = Str_To_Utf8(value_str); printf("Result %d: %s\n", i, value); free(value); DECREF(value_str); DECREF(hit); i++; } printf("\n"); DECREF(query_str); DECREF(hits); DECREF(field_str); }
static void test_Code_Point_At_and_From(TestBatchRunner *runner) { int32_t code_points[] = { 'a', smiley_cp, smiley_cp, 'b', smiley_cp, 'c' }; uint32_t num_code_points = sizeof(code_points) / sizeof(int32_t); String *string = Str_newf("a%s%sb%sc", smiley, smiley, smiley); uint32_t i; for (i = 0; i < num_code_points; i++) { uint32_t from = num_code_points - i; TEST_INT_EQ(runner, Str_Code_Point_At(string, i), code_points[i], "Code_Point_At %ld", (long)i); TEST_INT_EQ(runner, Str_Code_Point_From(string, from), code_points[i], "Code_Point_From %ld", (long)from); } TEST_INT_EQ(runner, Str_Code_Point_At(string, num_code_points), STR_OOB, "Code_Point_At %ld", (long)num_code_points); TEST_INT_EQ(runner, Str_Code_Point_From(string, 0), STR_OOB, "Code_Point_From 0"); TEST_INT_EQ(runner, Str_Code_Point_From(string, num_code_points + 1), STR_OOB, "Code_Point_From %ld", (long)(num_code_points + 1)); DECREF(string); }
Hash* DataWriter_Metadata_IMP(DataWriter *self) { Hash *metadata = Hash_new(0); Hash_Store_Utf8(metadata, "format", 6, (Obj*)Str_newf("%i32", DataWriter_Format(self))); return metadata; }
static void test_stemming(TestBatchRunner *runner) { FSFolder *modules_folder = TestUtils_modules_folder(); String *path = Str_newf("analysis/snowstem/source/test/tests.json"); Hash *tests = (Hash*)Json_slurp_json((Folder*)modules_folder, path); if (!tests) { RETHROW(Err_get_error()); } String *iso; Hash *lang_data; Hash_Iterate(tests); while (Hash_Next(tests, (Obj**)&iso, (Obj**)&lang_data)) { VArray *words = (VArray*)Hash_Fetch_Utf8(lang_data, "words", 5); VArray *stems = (VArray*)Hash_Fetch_Utf8(lang_data, "stems", 5); SnowballStemmer *stemmer = SnowStemmer_new(iso); for (uint32_t i = 0, max = VA_Get_Size(words); i < max; i++) { String *word = (String*)VA_Fetch(words, i); VArray *got = SnowStemmer_Split(stemmer, word); String *stem = (String*)VA_Fetch(got, 0); TEST_TRUE(runner, stem && Str_Is_A(stem, STRING) && Str_Equals(stem, VA_Fetch(stems, i)), "Stem %s: %s", Str_Get_Ptr8(iso), Str_Get_Ptr8(word) ); DECREF(got); } DECREF(stemmer); } DECREF(tests); DECREF(modules_folder); DECREF(path); }
static void test_To_ByteBuf(TestBatchRunner *runner) { String *string = Str_newf("foo"); ByteBuf *bb = Str_To_ByteBuf(string); TEST_TRUE(runner, BB_Equals_Bytes(bb, "foo", 3), "To_ByteBuf"); DECREF(bb); DECREF(string); }