static void test_stemming(TestBatchRunner *runner) { FSFolder *modules_folder = TestUtils_modules_folder(); String *path = Str_newf("analysis/snowstem/source/test/tests.json"); Hash *tests = (Hash*)Json_slurp_json((Folder*)modules_folder, path); if (!tests) { RETHROW(Err_get_error()); } String *iso; Hash *lang_data; Hash_Iterate(tests); while (Hash_Next(tests, (Obj**)&iso, (Obj**)&lang_data)) { VArray *words = (VArray*)Hash_Fetch_Utf8(lang_data, "words", 5); VArray *stems = (VArray*)Hash_Fetch_Utf8(lang_data, "stems", 5); SnowballStemmer *stemmer = SnowStemmer_new(iso); for (uint32_t i = 0, max = VA_Get_Size(words); i < max; i++) { String *word = (String*)VA_Fetch(words, i); VArray *got = SnowStemmer_Split(stemmer, word); String *stem = (String*)VA_Fetch(got, 0); TEST_TRUE(runner, stem && Str_Is_A(stem, STRING) && Str_Equals(stem, VA_Fetch(stems, i)), "Stem %s: %s", Str_Get_Ptr8(iso), Str_Get_Ptr8(word) ); DECREF(got); } DECREF(stemmer); } DECREF(tests); DECREF(modules_folder); DECREF(path); }
static void test_Read_Write(TestBatchRunner *runner) { FSFileHandle *fh; const char *foo = "foo"; const char *bar = "bar"; char buffer[12]; char *buf = buffer; String *test_filename = (String*)SSTR_WRAP_UTF8("_fstest", 7); remove(Str_Get_Ptr8(test_filename)); fh = FSFH_open(test_filename, FH_CREATE | FH_WRITE_ONLY | FH_EXCLUSIVE); TEST_TRUE(runner, FSFH_Length(fh) == INT64_C(0), "Length initially 0"); TEST_TRUE(runner, FSFH_Write(fh, foo, 3), "Write returns success"); TEST_TRUE(runner, FSFH_Length(fh) == INT64_C(3), "Length after Write"); TEST_TRUE(runner, FSFH_Write(fh, bar, 3), "Write returns success"); TEST_TRUE(runner, FSFH_Length(fh) == INT64_C(6), "Length after 2 Writes"); Err_set_error(NULL); TEST_FALSE(runner, FSFH_Read(fh, buf, 0, 2), "Reading from a write-only handle returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Reading from a write-only handle sets error"); if (!FSFH_Close(fh)) { RETHROW(INCREF(Err_get_error())); } DECREF(fh); // Reopen for reading. Err_set_error(NULL); fh = FSFH_open(test_filename, FH_READ_ONLY); TEST_TRUE(runner, FSFH_Length(fh) == INT64_C(6), "Length on Read"); TEST_TRUE(runner, FSFH_Read(fh, buf, 0, 6), "Read returns success"); TEST_TRUE(runner, strncmp(buf, "foobar", 6) == 0, "Read/Write"); TEST_TRUE(runner, FSFH_Read(fh, buf, 2, 3), "Read returns success"); TEST_TRUE(runner, strncmp(buf, "oba", 3) == 0, "Read with offset"); Err_set_error(NULL); TEST_FALSE(runner, FSFH_Read(fh, buf, -1, 4), "Read() with a negative offset returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Read() with a negative offset sets error"); Err_set_error(NULL); TEST_FALSE(runner, FSFH_Read(fh, buf, 6, 1), "Read() past EOF returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Read() past EOF sets error"); Err_set_error(NULL); TEST_FALSE(runner, FSFH_Write(fh, foo, 3), "Writing to a read-only handle returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Writing to a read-only handle sets error"); DECREF(fh); remove(Str_Get_Ptr8(test_filename)); }
void Method_Set_Host_Alias_IMP(Method *self, String *name) { if (self->host_alias) { THROW(ERR, "Can't Set_Host_Alias more than once"); } self->host_alias_internal = Str_new_from_trusted_utf8(Str_Get_Ptr8(name), Str_Get_Size(name)); self->host_alias = Str_new_wrap_trusted_utf8(Str_Get_Ptr8(self->host_alias_internal), Str_Get_Size(self->host_alias_internal)); }
static void test_utf8proc_normalization(TestBatchRunner *runner) { SKIP(runner, 1, "utf8proc can't handle control chars or Unicode non-chars"); return; for (int32_t i = 0; i < 100; i++) { String *source = TestUtils_random_string(rand() % 40); // Normalize once. uint8_t *normalized; int32_t check = utf8proc_map((const uint8_t*)Str_Get_Ptr8(source), Str_Get_Size(source), &normalized, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD); if (check < 0) { lucy_Json_set_tolerant(1); String *json = lucy_Json_to_json((Obj*)source); if (!json) { json = Str_newf("[failed to encode]"); } FAIL(runner, "Failed to normalize: %s", Str_Get_Ptr8(json)); DECREF(json); DECREF(source); return; } // Normalize again. size_t normalized_len = strlen((char*)normalized); uint8_t *dupe; int32_t dupe_check = utf8proc_map(normalized, normalized_len, &dupe, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD); if (dupe_check < 0) { THROW(ERR, "Unexpected normalization error: %i32", dupe_check); } int comparison = strcmp((char*)normalized, (char*)dupe); free(dupe); free(normalized); DECREF(source); if (comparison != 0) { FAIL(runner, "Not fully normalized"); return; } } PASS(runner, "Normalization successful."); }
static void test_Window(TestBatchRunner *runner) { String *test_filename = (String*)SSTR_WRAP_UTF8("_fstest", 7); FSFileHandle *fh; FileWindow *window = FileWindow_new(); FileWindowIVARS *const window_ivars = FileWindow_IVARS(window); uint32_t i; remove(Str_Get_Ptr8(test_filename)); fh = FSFH_open(test_filename, FH_CREATE | FH_WRITE_ONLY | FH_EXCLUSIVE); for (i = 0; i < 1024; i++) { FSFH_Write(fh, "foo ", 4); } if (!FSFH_Close(fh)) { RETHROW(INCREF(Err_get_error())); } // Reopen for reading. DECREF(fh); fh = FSFH_open(test_filename, FH_READ_ONLY); if (!fh) { RETHROW(INCREF(Err_get_error())); } Err_set_error(NULL); TEST_FALSE(runner, FSFH_Window(fh, window, -1, 4), "Window() with a negative offset returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Window() with a negative offset sets error"); Err_set_error(NULL); TEST_FALSE(runner, FSFH_Window(fh, window, 4000, 1000), "Window() past EOF returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Window() past EOF sets error"); TEST_TRUE(runner, FSFH_Window(fh, window, 1021, 2), "Window() returns true"); TEST_TRUE(runner, strncmp(window_ivars->buf - window_ivars->offset + 1021, "oo", 2) == 0, "Window()"); TEST_TRUE(runner, FSFH_Release_Window(fh, window), "Release_Window() returns true"); TEST_TRUE(runner, window_ivars->buf == NULL, "Release_Window() resets buf"); TEST_TRUE(runner, window_ivars->offset == 0, "Release_Window() resets offset"); TEST_TRUE(runner, window_ivars->len == 0, "Release_Window() resets len"); DECREF(window); DECREF(fh); remove(Str_Get_Ptr8(test_filename)); }
Inversion* RegexTokenizer_Transform_Text_IMP(RegexTokenizer *self, String *text) { Inversion *new_inversion = Inversion_new(NULL); RegexTokenizer_Tokenize_Utf8(self, Str_Get_Ptr8(text), Str_Get_Size(text), new_inversion); return new_inversion; }
void Freezer_serialize_string(String *string, OutStream *outstream) { size_t size = Str_Get_Size(string); const char *buf = Str_Get_Ptr8(string); OutStream_Write_C64(outstream, size); OutStream_Write_Bytes(outstream, buf, size); }
void Inverter_Add_Field_IMP(Inverter *self, InverterEntry *entry) { InverterIVARS *const ivars = Inverter_IVARS(self); InverterEntryIVARS *const entry_ivars = InvEntry_IVARS(entry); // Get an Inversion, going through analyzer if appropriate. if (entry_ivars->analyzer) { DECREF(entry_ivars->inversion); entry_ivars->inversion = Analyzer_Transform_Text(entry_ivars->analyzer, (String*)entry_ivars->value); Inversion_Invert(entry_ivars->inversion); } else if (entry_ivars->indexed || entry_ivars->highlightable) { String *value = (String*)entry_ivars->value; size_t token_len = Str_Get_Size(value); Token *seed = Token_new(Str_Get_Ptr8(value), token_len, 0, token_len, 1.0f, 1); DECREF(entry_ivars->inversion); entry_ivars->inversion = Inversion_new(seed); DECREF(seed); Inversion_Invert(entry_ivars->inversion); // Nearly a no-op. } // Prime the iterator. VA_Push(ivars->entries, INCREF(entry)); ivars->sorted = false; }
static void S_write_lockfile_json(void *context) { struct lockfile_context *stuff = (struct lockfile_context*)context; size_t size = Str_Get_Size(stuff->json); OutStream_Write_Bytes(stuff->outstream, Str_Get_Ptr8(stuff->json), size); OutStream_Close(stuff->outstream); }
Inversion* PolyAnalyzer_Transform_Text_IMP(PolyAnalyzer *self, String *text) { VArray *const analyzers = PolyAnalyzer_IVARS(self)->analyzers; const uint32_t num_analyzers = VA_Get_Size(analyzers); Inversion *retval; if (num_analyzers == 0) { size_t token_len = Str_Get_Size(text); const char *buf = Str_Get_Ptr8(text); Token *seed = Token_new(buf, token_len, 0, token_len, 1.0f, 1); retval = Inversion_new(seed); DECREF(seed); } else { Analyzer *first_analyzer = (Analyzer*)VA_Fetch(analyzers, 0); retval = Analyzer_Transform_Text(first_analyzer, text); for (uint32_t i = 1; i < num_analyzers; i++) { Analyzer *analyzer = (Analyzer*)VA_Fetch(analyzers, i); Inversion *new_inversion = Analyzer_Transform(analyzer, retval); DECREF(retval); retval = new_inversion; } } return retval; }
Inversion* WhitespaceTokenizer_Transform_Text_IMP(WhitespaceTokenizer *self, String *text) { Inversion *new_inversion = Inversion_new(NULL); WhitespaceTokenizer_Tokenize_Str(self, (char*)Str_Get_Ptr8(text), Str_Get_Size(text), new_inversion); return new_inversion; }
static void test_normalization(TestBatchRunner *runner) { FSFolder *modules_folder = TestUtils_modules_folder(); if (modules_folder == NULL) { SKIP(runner, 13, "Can't locate test data"); return; } String *path = Str_newf("unicode/utf8proc/tests.json"); Vector *tests = (Vector*)Json_slurp_json((Folder*)modules_folder, path); if (!tests) { RETHROW(Err_get_error()); } for (uint32_t i = 0, max = Vec_Get_Size(tests); i < max; i++) { Hash *test = (Hash*)Vec_Fetch(tests, i); String *form = (String*)Hash_Fetch_Utf8( test, "normalization_form", 18); bool case_fold = Bool_Get_Value((Boolean*)Hash_Fetch_Utf8( test, "case_fold", 9)); bool strip_accents = Bool_Get_Value((Boolean*)Hash_Fetch_Utf8( test, "strip_accents", 13)); Normalizer *normalizer = Normalizer_new(form, case_fold, strip_accents); Vector *words = (Vector*)Hash_Fetch_Utf8(test, "words", 5); Vector *norms = (Vector*)Hash_Fetch_Utf8(test, "norms", 5); for (uint32_t j = 0, max = Vec_Get_Size(words); j < max; j++) { String *word = (String*)Vec_Fetch(words, j); Vector *got = Normalizer_Split(normalizer, word); String *norm = (String*)Vec_Fetch(got, 0); TEST_TRUE(runner, norm && Str_is_a(norm, STRING) && Str_Equals(norm, Vec_Fetch(norms, j)), "Normalize %s %d %d: %s", Str_Get_Ptr8(form), case_fold, strip_accents, Str_Get_Ptr8(word) ); DECREF(got); } DECREF(normalizer); } DECREF(tests); DECREF(modules_folder); DECREF(path); }
void Freezer_serialize_string(String *string, OutStream *outstream) { size_t size = Str_Get_Size(string); const char *buf = Str_Get_Ptr8(string); if (size > INT32_MAX) { THROW(ERR, "Can't serialize string above 2GB: %u64", (uint64_t)size); } OutStream_Write_CU64(outstream, size); OutStream_Write_Bytes(outstream, buf, size); }
static void S_set_name(Class *self, const char *utf8, size_t size) { /* * We use a "wrapped" String for `name` because it's effectively * threadsafe: the sole reference is owned by an immortal object and any * INCREF spawns a copy. */ self->name_internal = Str_new_from_trusted_utf8(utf8, size); self->name = Str_new_wrap_trusted_utf8(Str_Get_Ptr8(self->name_internal), Str_Get_Size(self->name_internal)); }
Method* Method_init(Method *self, String *name, cfish_method_t callback_func, uint32_t offset) { /* The `name` member which Method exposes via the `Get_Name` accessor uses * a "wrapped" string because that is effectively threadsafe: an INCREF * results in a copy and the only reference is owned by an immortal * object. */ self->name_internal = Str_new_from_trusted_utf8(Str_Get_Ptr8(name), Str_Get_Size(name)); self->name = Str_new_wrap_trusted_utf8(Str_Get_Ptr8(self->name_internal), Str_Get_Size(self->name_internal)); self->host_alias = NULL; self->callback_func = callback_func; self->offset = offset; self->is_excluded = false; return self; }
static void test_Get_Ptr8(TestBatchRunner *runner) { String *string = S_get_str("Banana"); const char *ptr8 = Str_Get_Ptr8(string); TEST_TRUE(runner, strcmp(ptr8, "Banana") == 0, "Get_Ptr8"); size_t size = Str_Get_Size(string); TEST_INT_EQ(runner, size, 6, "Get_Size"); DECREF(string); }
static char* S_fullpath_ptr(FSFolder *self, String *path) { FSFolderIVARS *const ivars = FSFolder_IVARS(self); size_t folder_size = Str_Get_Size(ivars->path); size_t path_size = Str_Get_Size(path); size_t full_size = folder_size + 1 + path_size; const char *folder_ptr = Str_Get_Ptr8(ivars->path); const char *path_ptr = Str_Get_Ptr8(path); char *buf = (char*)MALLOCATE(full_size + 1); memcpy(buf, folder_ptr, folder_size); buf[folder_size] = DIR_SEP[0]; memcpy(buf + folder_size + 1, path_ptr, path_size); buf[full_size] = '\0'; if (DIR_SEP[0] != '/') { for (size_t i = 0; i < full_size; ++i) { if (buf[i] == '/') { buf[i] = DIR_SEP[0]; } } } return buf; }
static void test_Close(TestBatchRunner *runner) { String *test_filename = (String*)SSTR_WRAP_UTF8("_fstest", 7); FSFileHandle *fh; remove(Str_Get_Ptr8(test_filename)); fh = FSFH_open(test_filename, FH_CREATE | FH_WRITE_ONLY | FH_EXCLUSIVE); TEST_TRUE(runner, FSFH_Close(fh), "Close returns true for write-only"); DECREF(fh); // Simulate an OS error when closing the file descriptor. This // approximates what would happen if, say, we run out of disk space. remove(Str_Get_Ptr8(test_filename)); fh = FSFH_open(test_filename, FH_CREATE | FH_WRITE_ONLY | FH_EXCLUSIVE); #ifdef _MSC_VER SKIP(runner, "LUCY-155"); SKIP(runner, "LUCY-155"); #else int saved_fd = FSFH_IVARS(fh)->fd; FSFH_IVARS(fh)->fd = -1; Err_set_error(NULL); bool result = FSFH_Close(fh); TEST_FALSE(runner, result, "Failed Close() returns false"); TEST_TRUE(runner, Err_get_error() != NULL, "Failed Close() sets Err_error"); FSFH_IVARS(fh)->fd = saved_fd; #endif /* _MSC_VER */ DECREF(fh); fh = FSFH_open(test_filename, FH_READ_ONLY); TEST_TRUE(runner, FSFH_Close(fh), "Close returns true for read-only"); DECREF(fh); remove(Str_Get_Ptr8(test_filename)); }
RawPosting* MatchPost_Read_Raw_IMP(MatchPosting *self, InStream *instream, int32_t last_doc_id, String *term_text, MemoryPool *mem_pool) { const char *const text_buf = Str_Get_Ptr8(term_text); const size_t text_size = Str_Get_Size(term_text); const uint32_t doc_code = InStream_Read_C32(instream); const uint32_t delta_doc = doc_code >> 1; const int32_t doc_id = last_doc_id + delta_doc; const uint32_t freq = (doc_code & 1) ? 1 : InStream_Read_C32(instream); const size_t base_size = VTable_Get_Obj_Alloc_Size(RAWPOSTING); size_t raw_post_bytes = MAX_RAW_POSTING_LEN(base_size, text_size); void *const allocation = MemPool_Grab(mem_pool, raw_post_bytes); UNUSED_VAR(self); return RawPost_new(allocation, doc_id, freq, text_buf, text_size); }
void TextTermStepper_Write_Delta_IMP(TextTermStepper *self, OutStream *outstream, Obj *value) { TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); CharBuf *charbuf = (CharBuf*)ivars->value; const char *last_text = CB_Get_Ptr8(charbuf); size_t last_size = CB_Get_Size(charbuf); const char *new_text = NULL; size_t new_size = 0; if (Obj_is_a(value, STRING)) { String *new_string = (String*)value; new_text = Str_Get_Ptr8(new_string); new_size = Str_Get_Size(new_string); } else if (Obj_is_a(value, CHARBUF)) { CharBuf *new_charbuf = (CharBuf*)value; new_text = CB_Get_Ptr8(new_charbuf); new_size = CB_Get_Size(new_charbuf); } else { THROW(ERR, "'value' must be a String or CharBuf"); } // Count how many bytes the strings share at the top. const int32_t overlap = StrHelp_overlap(last_text, new_text, last_size, new_size); const char *const diff_start_str = new_text + overlap; const size_t diff_len = new_size - overlap; // Write number of common bytes and common bytes. OutStream_Write_C32(outstream, overlap); OutStream_Write_String(outstream, diff_start_str, diff_len); // Update value. CB_Mimic_Utf8(charbuf, new_text, new_size); // Invalidate string. DECREF(ivars->string); ivars->string = NULL; }
static void test_offsets(TestBatchRunner *runner) { Folder *folder = S_folder_with_contents(); CompoundFileWriter *cf_writer = CFWriter_new(folder); Hash *cf_metadata; Hash *files; CFWriter_Consolidate(cf_writer); cf_metadata = (Hash*)CERTIFY( Json_slurp_json(folder, cfmeta_file), HASH); files = (Hash*)CERTIFY( Hash_Fetch_Utf8(cf_metadata, "files", 5), HASH); bool offsets_ok = true; TEST_TRUE(runner, Hash_Get_Size(files) > 0, "Multiple files"); HashIterator *iter = HashIter_new(files); while (HashIter_Next(iter)) { String *file = HashIter_Get_Key(iter); Hash *stats = (Hash*)CERTIFY(HashIter_Get_Value(iter), HASH); Obj *offset = CERTIFY(Hash_Fetch_Utf8(stats, "offset", 6), OBJ); int64_t offs = Obj_To_I64(offset); if (offs % 8 != 0) { offsets_ok = false; FAIL(runner, "Offset %" PRId64 " for %s not a multiple of 8", offset, Str_Get_Ptr8(file)); break; } } DECREF(iter); if (offsets_ok) { PASS(runner, "All offsets are multiples of 8"); } DECREF(cf_metadata); DECREF(cf_writer); DECREF(folder); }
RawPosting* ScorePost_Read_Raw_IMP(ScorePosting *self, InStream *instream, int32_t last_doc_id, String *term_text, MemoryPool *mem_pool) { const char *const text_buf = Str_Get_Ptr8(term_text); const size_t text_size = Str_Get_Size(term_text); const uint32_t doc_code = InStream_Read_C32(instream); const uint32_t delta_doc = doc_code >> 1; const int32_t doc_id = last_doc_id + delta_doc; const uint32_t freq = (doc_code & 1) ? 1 : InStream_Read_C32(instream); const size_t base_size = Class_Get_Obj_Alloc_Size(RAWPOSTING); size_t raw_post_bytes = MAX_RAW_POSTING_LEN(base_size, text_size, freq); void *const allocation = MemPool_Grab(mem_pool, raw_post_bytes); RawPosting *const raw_posting = RawPost_new(allocation, doc_id, freq, text_buf, text_size); RawPostingIVARS *const raw_post_ivars = RawPost_IVARS(raw_posting); uint32_t num_prox = freq; char *const start = raw_post_ivars->blob + text_size; char *dest = start; UNUSED_VAR(self); // Field_boost. *((uint8_t*)dest) = InStream_Read_U8(instream); dest++; // Read positions. while (num_prox--) { dest += InStream_Read_Raw_C64(instream, dest); } // Resize raw posting memory allocation. raw_post_ivars->aux_len = dest - start; raw_post_bytes = dest - (char*)raw_posting; MemPool_Resize(mem_pool, raw_posting, raw_post_bytes); return raw_posting; }
static Class* S_simple_subclass(Class *parent, String *name) { if (parent->flags & CFISH_fFINAL) { THROW(ERR, "Can't subclass final class %o", Class_Get_Name(parent)); } Class *subclass = (Class*)Memory_wrapped_calloc(parent->class_alloc_size, 1); Class_Init_Obj(parent->klass, subclass); subclass->parent = parent; subclass->flags = parent->flags; subclass->obj_alloc_size = parent->obj_alloc_size; subclass->class_alloc_size = parent->class_alloc_size; subclass->methods = (Method**)CALLOCATE(1, sizeof(Method*)); S_set_name(subclass, Str_Get_Ptr8(name), Str_Get_Size(name)); memcpy(subclass->vtable, parent->vtable, parent->class_alloc_size - offsetof(Class, vtable)); return subclass; }
static void test_Raw_Excerpt(TestBatchRunner *runner, Searcher *searcher, Obj *query) { String *content = (String*)SSTR_WRAP_UTF8("content", 7); Highlighter *highlighter = Highlighter_new(searcher, query, content, 6); int32_t top; String *raw_excerpt; String *field_val = (String *)SSTR_WRAP_UTF8("Ook. Urk. Ick. ", 18); Vector *spans = Vec_new(1); Vec_Push(spans, (Obj*)Span_new(0, 18, 1.0f)); HeatMap *heat_map = HeatMap_new(spans, 133); DECREF(spans); raw_excerpt = Highlighter_Raw_Excerpt(highlighter, field_val, &top, heat_map); TEST_TRUE(runner, Str_Equals_Utf8(raw_excerpt, "Ook.", 4), "Raw_Excerpt at top %s", Str_Get_Ptr8(raw_excerpt)); TEST_TRUE(runner, top == 0, "top is 0"); DECREF(raw_excerpt); DECREF(heat_map); spans = Vec_new(1); Vec_Push(spans, (Obj*)Span_new(6, 12, 1.0f)); heat_map = HeatMap_new(spans, 133); DECREF(spans); raw_excerpt = Highlighter_Raw_Excerpt(highlighter, field_val, &top, heat_map); TEST_TRUE(runner, Str_Equals_Utf8(raw_excerpt, "Urk.", 4), "Raw_Excerpt in middle, with 2 bounds"); TEST_TRUE(runner, top == 6, "top in the middle modified by Raw_Excerpt"); DECREF(raw_excerpt); DECREF(heat_map); field_val = (String *)SSTR_WRAP_UTF8("Ook urk ick i.", 14); spans = Vec_new(1); Vec_Push(spans, (Obj*)Span_new(12, 1, 1.0f)); heat_map = HeatMap_new(spans, 133); DECREF(spans); raw_excerpt = Highlighter_Raw_Excerpt(highlighter, field_val, &top, heat_map); TEST_TRUE(runner, Str_Equals_Utf8(raw_excerpt, ELLIPSIS " i.", 6), "Ellipsis at top"); TEST_TRUE(runner, top == 10, "top correct when leading ellipsis inserted"); DECREF(heat_map); DECREF(raw_excerpt); field_val = (String *)SSTR_WRAP_UTF8("Urk. Iz no good.", 17); spans = Vec_new(1); Vec_Push(spans, (Obj*)Span_new(6, 2, 1.0f)); heat_map = HeatMap_new(spans, 133); DECREF(spans); raw_excerpt = Highlighter_Raw_Excerpt(highlighter, field_val, &top, heat_map); TEST_TRUE(runner, Str_Equals_Utf8(raw_excerpt, "Iz no" ELLIPSIS, 8), "Ellipsis at end"); TEST_TRUE(runner, top == 6, "top trimmed"); DECREF(heat_map); DECREF(raw_excerpt); // Words longer than excerpt len field_val = (String *)SSTR_WRAP_UTF8("abc/def/ghi/jkl/mno", 19); spans = Vec_new(1); Vec_Push(spans, (Obj*)Span_new(0, 3, 1.0f)); heat_map = HeatMap_new(spans, 133); DECREF(spans); raw_excerpt = Highlighter_Raw_Excerpt(highlighter, field_val, &top, heat_map); TEST_TRUE(runner, Str_Equals_Utf8(raw_excerpt, "abc/d" ELLIPSIS, 8), "Long word at top"); DECREF(heat_map); DECREF(raw_excerpt); spans = Vec_new(1); Vec_Push(spans, (Obj*)Span_new(8, 3, 1.0f)); heat_map = HeatMap_new(spans, 133); DECREF(spans); raw_excerpt = Highlighter_Raw_Excerpt(highlighter, field_val, &top, heat_map); TEST_TRUE(runner, Str_Equals_Utf8(raw_excerpt, ELLIPSIS " f/g" ELLIPSIS, 10), "Long word in middle"); DECREF(heat_map); DECREF(raw_excerpt); DECREF(highlighter); }
void Class_bootstrap(const cfish_ParcelSpec *parcel_spec) { const ClassSpec *specs = parcel_spec->class_specs; const NovelMethSpec *novel_specs = parcel_spec->novel_specs; const OverriddenMethSpec *overridden_specs = parcel_spec->overridden_specs; const InheritedMethSpec *inherited_specs = parcel_spec->inherited_specs; uint32_t num_classes = parcel_spec->num_classes; /* Pass 1: * - Allocate memory. * - Initialize global Class pointers. */ for (uint32_t i = 0; i < num_classes; ++i) { const ClassSpec *spec = &specs[i]; Class *parent = NULL; if (spec->parent) { parent = *spec->parent; if (!parent) { // Wrong order of class specs or inheritance cycle. fprintf(stderr, "Parent class of '%s' not initialized\n", spec->name); abort(); } } uint32_t novel_offset = parent ? parent->class_alloc_size : offsetof(Class, vtable); uint32_t class_alloc_size = novel_offset + spec->num_novel_meths * sizeof(cfish_method_t); Class *klass = (Class*)CALLOCATE(class_alloc_size, 1); // Needed to calculate size of subclasses. klass->class_alloc_size = class_alloc_size; // Initialize the global pointer to the Class. if (!Atomic_cas_ptr((void**)spec->klass, NULL, klass)) { // Another thread beat us to it. FREEMEM(klass); } } /* Pass 2: * - Initialize IVARS_OFFSET. * - Initialize 'klass' ivar and refcount by calling Init_Obj. * - Initialize parent, flags, obj_alloc_size, class_alloc_size. * - Assign parcel_spec. * - Initialize method pointers and offsets. */ uint32_t num_novel = 0; uint32_t num_overridden = 0; uint32_t num_inherited = 0; for (uint32_t i = 0; i < num_classes; ++i) { const ClassSpec *spec = &specs[i]; Class *klass = *spec->klass; Class *parent = spec->parent ? *spec->parent : NULL; uint32_t ivars_offset = 0; if (spec->ivars_offset_ptr != NULL) { if (parent) { Class *ancestor = parent; while (ancestor && ancestor->parcel_spec == parcel_spec) { ancestor = ancestor->parent; } ivars_offset = ancestor ? ancestor->obj_alloc_size : 0; *spec->ivars_offset_ptr = ivars_offset; } else { *spec->ivars_offset_ptr = 0; } } // CLASS->obj_alloc_size is always 0, so Init_Obj doesn't clear any // values set in the previous pass or by another thread. Class_Init_Obj_IMP(CLASS, klass); klass->parent = parent; klass->parcel_spec = parcel_spec; // CLASS->obj_alloc_size must stay at 0. if (klass != CLASS) { klass->obj_alloc_size = ivars_offset + spec->ivars_size; } if (cfish_Class_bootstrap_hook1 != NULL) { cfish_Class_bootstrap_hook1(klass); } klass->flags = 0; if (klass == CLASS || klass == METHOD || klass == BOOLEAN || klass == STRING ) { klass->flags |= CFISH_fREFCOUNTSPECIAL; } if (spec->flags & cfish_ClassSpec_FINAL) { klass->flags |= CFISH_fFINAL; } if (parent) { // Copy parent vtable. uint32_t parent_vt_size = parent->class_alloc_size - offsetof(Class, vtable); memcpy(klass->vtable, parent->vtable, parent_vt_size); } for (size_t i = 0; i < spec->num_inherited_meths; ++i) { const InheritedMethSpec *mspec = &inherited_specs[num_inherited++]; *mspec->offset = *mspec->parent_offset; } for (size_t i = 0; i < spec->num_overridden_meths; ++i) { const OverriddenMethSpec *mspec = &overridden_specs[num_overridden++]; *mspec->offset = *mspec->parent_offset; Class_Override_IMP(klass, mspec->func, *mspec->offset); } uint32_t novel_offset = parent ? parent->class_alloc_size : offsetof(Class, vtable); for (size_t i = 0; i < spec->num_novel_meths; ++i) { const NovelMethSpec *mspec = &novel_specs[num_novel++]; *mspec->offset = novel_offset; novel_offset += sizeof(cfish_method_t); Class_Override_IMP(klass, mspec->func, *mspec->offset); } } /* Now it's safe to call methods. * * Pass 3: * - Inititalize name and method array. * - Register class. */ num_novel = 0; num_overridden = 0; num_inherited = 0; for (uint32_t i = 0; i < num_classes; ++i) { const ClassSpec *spec = &specs[i]; Class *klass = *spec->klass; String *name_internal = Str_new_from_trusted_utf8(spec->name, strlen(spec->name)); if (!Atomic_cas_ptr((void**)&klass->name_internal, NULL, name_internal) ) { DECREF(name_internal); name_internal = klass->name_internal; } String *name = Str_new_wrap_trusted_utf8(Str_Get_Ptr8(name_internal), Str_Get_Size(name_internal)); if (!Atomic_cas_ptr((void**)&klass->name, NULL, name)) { DECREF(name); name = klass->name; } Method **methods = (Method**)MALLOCATE((spec->num_novel_meths + 1) * sizeof(Method*)); // Only store novel methods for now. for (size_t i = 0; i < spec->num_novel_meths; ++i) { const NovelMethSpec *mspec = &novel_specs[num_novel++]; String *name = SSTR_WRAP_C(mspec->name); Method *method = Method_new(name, mspec->callback_func, *mspec->offset); methods[i] = method; } methods[spec->num_novel_meths] = NULL; if (!Atomic_cas_ptr((void**)&klass->methods, NULL, methods)) { // Another thread beat us to it. for (size_t i = 0; i < spec->num_novel_meths; ++i) { Method_Destroy(methods[i]); } FREEMEM(methods); } Class_add_to_registry(klass); } }
static void test_open(TestBatchRunner *runner) { FSFileHandle *fh; String *test_filename = (String*)SSTR_WRAP_UTF8("_fstest", 7); remove(Str_Get_Ptr8(test_filename)); Err_set_error(NULL); fh = FSFH_open(test_filename, FH_READ_ONLY); TEST_TRUE(runner, fh == NULL, "open() with FH_READ_ONLY on non-existent file returns NULL"); TEST_TRUE(runner, Err_get_error() != NULL, "open() with FH_READ_ONLY on non-existent file sets error"); Err_set_error(NULL); fh = FSFH_open(test_filename, FH_WRITE_ONLY); TEST_TRUE(runner, fh == NULL, "open() without FH_CREATE returns NULL"); TEST_TRUE(runner, Err_get_error() != NULL, "open() without FH_CREATE sets error"); Err_set_error(NULL); fh = FSFH_open(test_filename, FH_CREATE); TEST_TRUE(runner, fh == NULL, "open() without FH_WRITE_ONLY returns NULL"); TEST_TRUE(runner, Err_get_error() != NULL, "open() without FH_WRITE_ONLY sets error"); Err_set_error(NULL); fh = FSFH_open(test_filename, FH_CREATE | FH_WRITE_ONLY | FH_EXCLUSIVE); TEST_TRUE(runner, fh && FSFH_Is_A(fh, FSFILEHANDLE), "open() succeeds"); TEST_TRUE(runner, Err_get_error() == NULL, "open() no errors"); FSFH_Write(fh, "foo", 3); if (!FSFH_Close(fh)) { RETHROW(INCREF(Err_get_error())); } DECREF(fh); Err_set_error(NULL); fh = FSFH_open(test_filename, FH_CREATE | FH_WRITE_ONLY | FH_EXCLUSIVE); TEST_TRUE(runner, fh == NULL, "FH_EXCLUSIVE blocks open()"); TEST_TRUE(runner, Err_get_error() != NULL, "FH_EXCLUSIVE blocks open(), sets error"); Err_set_error(NULL); fh = FSFH_open(test_filename, FH_CREATE | FH_WRITE_ONLY); TEST_TRUE(runner, fh && FSFH_Is_A(fh, FSFILEHANDLE), "open() for append"); TEST_TRUE(runner, Err_get_error() == NULL, "open() for append -- no errors"); FSFH_Write(fh, "bar", 3); if (!FSFH_Close(fh)) { RETHROW(INCREF(Err_get_error())); } DECREF(fh); Err_set_error(NULL); fh = FSFH_open(test_filename, FH_READ_ONLY); TEST_TRUE(runner, fh && FSFH_Is_A(fh, FSFILEHANDLE), "open() read only"); TEST_TRUE(runner, Err_get_error() == NULL, "open() read only -- no errors"); DECREF(fh); remove(Str_Get_Ptr8(test_filename)); }
static void test_query_parser_syntax(TestBatchRunner *runner) { if (!RegexTokenizer_is_available()) { for (uint32_t i = 0; leaf_test_funcs[i] != NULL; i++) { SKIP(runner, 3, "RegexTokenizer not available"); } for (uint32_t i = 0; syntax_test_funcs[i] != NULL; i++) { SKIP(runner, 2, "RegexTokenizer not available"); } return; } Folder *index = build_index(); IndexSearcher *searcher = IxSearcher_new((Obj*)index); QueryParser *qparser = QParser_new(IxSearcher_Get_Schema(searcher), NULL, NULL, NULL); QParser_Set_Heed_Colons(qparser, true); for (uint32_t i = 0; leaf_test_funcs[i] != NULL; i++) { LUCY_TestQPSyntax_Test_t test_func = leaf_test_funcs[i]; TestQueryParser *test_case = test_func(); TestQueryParserIVARS *ivars = TestQP_IVARS(test_case); Query *tree = QParser_Tree(qparser, ivars->query_string); Query *expanded = QParser_Expand_Leaf(qparser, ivars->tree); Query *parsed = QParser_Parse(qparser, ivars->query_string); Hits *hits = IxSearcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL); TEST_TRUE(runner, Query_Equals(tree, (Obj*)ivars->tree), "tree() %s", Str_Get_Ptr8(ivars->query_string)); TEST_TRUE(runner, Query_Equals(expanded, (Obj*)ivars->expanded), "expand_leaf() %s", Str_Get_Ptr8(ivars->query_string)); TEST_INT_EQ(runner, Hits_Total_Hits(hits), ivars->num_hits, "hits: %s", Str_Get_Ptr8(ivars->query_string)); DECREF(hits); DECREF(parsed); DECREF(expanded); DECREF(tree); DECREF(test_case); } for (uint32_t i = 0; syntax_test_funcs[i] != NULL; i++) { LUCY_TestQPSyntax_Test_t test_func = syntax_test_funcs[i]; TestQueryParser *test_case = test_func(); TestQueryParserIVARS *ivars = TestQP_IVARS(test_case); Query *tree = QParser_Tree(qparser, ivars->query_string); Query *parsed = QParser_Parse(qparser, ivars->query_string); Hits *hits = IxSearcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL); TEST_TRUE(runner, Query_Equals(tree, (Obj*)ivars->tree), "tree() %s", Str_Get_Ptr8(ivars->query_string)); TEST_INT_EQ(runner, Hits_Total_Hits(hits), ivars->num_hits, "hits: %s", Str_Get_Ptr8(ivars->query_string)); DECREF(hits); DECREF(parsed); DECREF(tree); DECREF(test_case); } DECREF(searcher); DECREF(qparser); DECREF(index); }
void DocWriter_Add_Inverted_Doc_IMP(DocWriter *self, Inverter *inverter, int32_t doc_id) { DocWriterIVARS *const ivars = DocWriter_IVARS(self); OutStream *dat_out = S_lazy_init(self); OutStream *ix_out = ivars->ix_out; uint32_t num_stored = 0; int64_t start = OutStream_Tell(dat_out); int64_t expected = OutStream_Tell(ix_out) / 8; // Verify doc id. if (doc_id != expected) { THROW(ERR, "Expected doc id %i64 but got %i32", expected, doc_id); } // Write the number of stored fields. Inverter_Iterate(inverter); while (Inverter_Next(inverter)) { FieldType *type = Inverter_Get_Type(inverter); if (FType_Stored(type)) { num_stored++; } } OutStream_Write_C32(dat_out, num_stored); Inverter_Iterate(inverter); while (Inverter_Next(inverter)) { // Only store fields marked as "stored". FieldType *type = Inverter_Get_Type(inverter); if (FType_Stored(type)) { String *field = Inverter_Get_Field_Name(inverter); Obj *value = Inverter_Get_Value(inverter); Freezer_serialize_string(field, dat_out); switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: { const char *buf = Str_Get_Ptr8((String*)value); size_t size = Str_Get_Size((String*)value); OutStream_Write_C32(dat_out, size); OutStream_Write_Bytes(dat_out, buf, size); break; } case FType_BLOB: { char *buf = BB_Get_Buf((ByteBuf*)value); size_t size = BB_Get_Size((ByteBuf*)value); OutStream_Write_C32(dat_out, size); OutStream_Write_Bytes(dat_out, buf, size); break; } case FType_INT32: { int32_t val = Int32_Get_Value((Integer32*)value); OutStream_Write_C32(dat_out, val); break; } case FType_INT64: { int64_t val = Int64_Get_Value((Integer64*)value); OutStream_Write_C64(dat_out, val); break; } case FType_FLOAT32: { float val = Float32_Get_Value((Float32*)value); OutStream_Write_F32(dat_out, val); break; } case FType_FLOAT64: { double val = Float64_Get_Value((Float64*)value); OutStream_Write_F64(dat_out, val); break; } default: THROW(ERR, "Unrecognized type: %o", type); } } } // Write file pointer. OutStream_Write_I64(ix_out, start); }
static void test_tokenizer(TestBatchRunner *runner) { StandardTokenizer *tokenizer = StandardTokenizer_new(); String *word = SSTR_WRAP_C( " ." "tha\xCC\x82t's" ":" "1,02\xC2\xADZ4.38" "\xE0\xB8\x81\xC2\xAD\xC2\xAD" "\xF0\xA0\x80\x80" "a" "/"); Vector *got = StandardTokenizer_Split(tokenizer, word); String *token = (String*)Vec_Fetch(got, 0); TEST_TRUE(runner, token && Str_is_a(token, STRING) && Str_Equals_Utf8(token, "tha\xcc\x82t's", 8), "Token: %s", Str_Get_Ptr8(token)); token = (String*)Vec_Fetch(got, 1); TEST_TRUE(runner, token && Str_is_a(token, STRING) && Str_Equals_Utf8(token, "1,02\xC2\xADZ4.38", 11), "Token: %s", Str_Get_Ptr8(token)); token = (String*)Vec_Fetch(got, 2); TEST_TRUE(runner, token && Str_is_a(token, STRING) && Str_Equals_Utf8(token, "\xE0\xB8\x81\xC2\xAD\xC2\xAD", 7), "Token: %s", Str_Get_Ptr8(token)); token = (String*)Vec_Fetch(got, 3); TEST_TRUE(runner, token && Str_is_a(token, STRING) && Str_Equals_Utf8(token, "\xF0\xA0\x80\x80", 4), "Token: %s", Str_Get_Ptr8(token)); token = (String*)Vec_Fetch(got, 4); TEST_TRUE(runner, token && Str_is_a(token, STRING) && Str_Equals_Utf8(token, "a", 1), "Token: %s", Str_Get_Ptr8(token)); DECREF(got); FSFolder *modules_folder = TestUtils_modules_folder(); if (modules_folder == NULL) { SKIP(runner, 1372, "Can't locate test data"); } else { String *path = Str_newf("unicode/ucd/WordBreakTest.json"); Vector *tests = (Vector*)Json_slurp_json((Folder*)modules_folder, path); if (!tests) { RETHROW(Err_get_error()); } for (uint32_t i = 0, max = Vec_Get_Size(tests); i < max; i++) { Hash *test = (Hash*)Vec_Fetch(tests, i); String *text = (String*)Hash_Fetch_Utf8(test, "text", 4); Vector *wanted = (Vector*)Hash_Fetch_Utf8(test, "words", 5); Vector *got = StandardTokenizer_Split(tokenizer, text); TEST_TRUE(runner, Vec_Equals(wanted, (Obj*)got), "UCD test #%d", i + 1); DECREF(got); } DECREF(tests); DECREF(modules_folder); DECREF(path); } DECREF(tokenizer); }
static void S_write_val(Obj *val, int8_t prim_id, OutStream *ix_out, OutStream *dat_out, int64_t dat_start) { if (val) { switch (prim_id & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: { String *string = (String*)val; int64_t dat_pos = OutStream_Tell(dat_out) - dat_start; OutStream_Write_I64(ix_out, dat_pos); OutStream_Write_Bytes(dat_out, Str_Get_Ptr8(string), Str_Get_Size(string)); break; } case FType_BLOB: { Blob *blob = (Blob*)val; int64_t dat_pos = OutStream_Tell(dat_out) - dat_start; OutStream_Write_I64(ix_out, dat_pos); OutStream_Write_Bytes(dat_out, Blob_Get_Buf(blob), Blob_Get_Size(blob)); break; } case FType_INT32: { int32_t i32 = (int32_t)Int_Get_Value((Integer*)val); OutStream_Write_I32(dat_out, i32); break; } case FType_INT64: { int64_t i64 = Int_Get_Value((Integer*)val); OutStream_Write_I64(dat_out, i64); break; } case FType_FLOAT32: { float f32 = (float)Float_Get_Value((Float*)val); OutStream_Write_F32(dat_out, f32); break; } case FType_FLOAT64: { double f64 = Float_Get_Value((Float*)val); OutStream_Write_F64(dat_out, f64); break; } default: THROW(ERR, "Unrecognized primitive id: %i32", (int32_t)prim_id); } } else { switch (prim_id & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: case FType_BLOB: { int64_t dat_pos = OutStream_Tell(dat_out) - dat_start; OutStream_Write_I64(ix_out, dat_pos); } break; case FType_INT32: OutStream_Write_I32(dat_out, 0); break; case FType_INT64: OutStream_Write_I64(dat_out, 0); break; case FType_FLOAT64: OutStream_Write_F64(dat_out, 0.0); break; case FType_FLOAT32: OutStream_Write_F32(dat_out, 0.0f); break; default: THROW(ERR, "Unrecognized primitive id: %i32", (int32_t)prim_id); } } }