Hash* Normalizer_dump(Normalizer *self) { Normalizer_Dump_t super_dump = SUPER_METHOD_PTR(NORMALIZER, Lucy_Normalizer_Dump); Hash *dump = super_dump(self); int options = self->options; CharBuf *form = options & UTF8PROC_COMPOSE ? options & UTF8PROC_COMPAT ? CB_new_from_trusted_utf8("NFKC", 4) : CB_new_from_trusted_utf8("NFC", 3) : options & UTF8PROC_COMPAT ? CB_new_from_trusted_utf8("NFKD", 4) : CB_new_from_trusted_utf8("NFD", 3); Hash_Store_Str(dump, "normalization_form", 18, (Obj*)form); BoolNum *case_fold = Bool_singleton(options & UTF8PROC_CASEFOLD); Hash_Store_Str(dump, "case_fold", 9, (Obj*)case_fold); BoolNum *strip_accents = Bool_singleton(options & UTF8PROC_STRIPMARK); Hash_Store_Str(dump, "strip_accents", 13, (Obj*)strip_accents); return dump; }
CharBuf* CB_new_from_utf8(const char *ptr, size_t size) { if (!StrHelp_utf8_valid(ptr, size)) { DIE_INVALID_UTF8(ptr, size); } return CB_new_from_trusted_utf8(ptr, size); }
static void test_Dump_and_Load(TestBatch *batch) { Hash *hash = Hash_new(0); Obj *dump; Hash *loaded; Hash_Store_Str(hash, "foo", 3, (Obj*)CB_new_from_trusted_utf8("foo", 3)); dump = (Obj*)Hash_Dump(hash); loaded = (Hash*)Obj_Load(dump, dump); TEST_TRUE(batch, Hash_Equals(hash, (Obj*)loaded), "Dump => Load round trip"); DECREF(dump); DECREF(loaded); /* TODO: Fix Hash_Load(). Hash_Store_Str(hash, "_class", 6, (Obj*)CB_new_from_trusted_utf8("not_a_class", 11)); dump = (Obj*)Hash_Dump(hash); loaded = (Hash*)Obj_Load(dump, dump); TEST_TRUE(batch, Hash_Equals(hash, (Obj*)loaded), "Load still works with _class if it's not a real class"); DECREF(dump); DECREF(loaded); */ DECREF(hash); }
CharBuf* RangeQuery_to_string(RangeQuery *self) { CharBuf *lower_term_str = self->lower_term ? Obj_To_String(self->lower_term) : CB_new_from_trusted_utf8("*", 1); CharBuf *upper_term_str = self->upper_term ? Obj_To_String(self->upper_term) : CB_new_from_trusted_utf8("*", 1); CharBuf *retval = CB_newf("%o:%s%o TO %o%s", self->field, self->include_lower ? "[" : "{", lower_term_str, upper_term_str, self->include_upper ? "]" : "}" ); DECREF(upper_term_str); DECREF(lower_term_str); return retval; }
CharBuf* Compiler_to_string(Compiler *self) { CharBuf *stringified_query = Query_To_String(self->parent); CharBuf *string = CB_new_from_trusted_utf8("compiler(", 9); CB_Cat(string, stringified_query); CB_Cat_Trusted_Str(string, ")", 1); DECREF(stringified_query); return string; }
static void vcatf_f64(VArray *tests) { CharBuf *wanted; char buf[64]; float num = 1.3f; CharBuf *got = S_get_cb("foo "); sprintf(buf, "foo bar %g baz", num); wanted = CB_new_from_trusted_utf8(buf, strlen(buf)); CB_catf(got, "bar %f64 baz", num); VA_Push(tests, (Obj*)TestCB_new(wanted, got)); }
static void test_vcatf_f64(TestBatch *batch) { CharBuf *wanted; char buf[64]; float num = 1.3f; CharBuf *got = S_get_cb("foo "); sprintf(buf, "foo bar %g baz", num); wanted = CB_new_from_trusted_utf8(buf, strlen(buf)); CB_catf(got, "bar %f64 baz", num); TEST_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%f64"); DECREF(wanted); DECREF(got); }
static void vcatf_x32(VArray *tests) { CharBuf *wanted; char buf[64]; unsigned long num = I32_MAX; CharBuf *got = S_get_cb("foo "); #if (SIZEOF_LONG == 4) sprintf(buf, "foo bar %.8lx baz", num); #elif (SIZEOF_INT == 4) sprintf(buf, "foo bar %.8x baz", (unsigned)num); #endif wanted = CB_new_from_trusted_utf8(buf, strlen(buf)); CB_catf(got, "bar %x32 baz", (u32_t)num); VA_Push(tests, (Obj*)TestCB_new(wanted, got)); }
CharBuf* StrHelp_to_base36(u32_t num) { char buffer[11]; char *buf = buffer + 10; /* Null terminate. */ *buf = '\0'; /* Convert to base 36 characters. */ do { *(--buf) = base36_chars[ num % 36 ]; num /= 36; } while (num > 0); return CB_new_from_trusted_utf8(buf, strlen(buf)); }
static void test_vcatf_x32(TestBatch *batch) { CharBuf *wanted; char buf[64]; unsigned long num = I32_MAX; CharBuf *got = S_get_cb("foo "); #if (SIZEOF_LONG == 4) sprintf(buf, "foo bar %.8lx baz", num); #elif (SIZEOF_INT == 4) sprintf(buf, "foo bar %.8x baz", (unsigned)num); #endif wanted = CB_new_from_trusted_utf8(buf, strlen(buf)); CB_catf(got, "bar %x32 baz", (uint32_t)num); TEST_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%x32"); DECREF(wanted); DECREF(got); }
IndexManager* IxManager_init(IndexManager *self, const CharBuf *host, LockFactory *lock_factory) { self->host = host ? CB_Clone(host) : CB_new_from_trusted_utf8("", 0); self->lock_factory = (LockFactory*)INCREF(lock_factory); self->folder = NULL; self->write_lock_timeout = 1000; self->write_lock_interval = 100; self->merge_lock_timeout = 0; self->merge_lock_interval = 1000; self->deletion_lock_timeout = 1000; self->deletion_lock_interval = 100; return self; }
static CharBuf* S_parse_string(char **json_ptr, char *const limit) { // Find terminating double quote, determine whether there are any escapes. char *top = *json_ptr + 1; char *end = NULL; bool_t saw_backslash = false; for (char *text = top; text < limit; text++) { if (*text == '"') { end = text; break; } else if (*text == '\\') { saw_backslash = true; if (text + 1 < limit && text[1] == 'u') { text += 5; } else { text += 1; } } } if (!end) { SET_ERROR(CB_newf("Unterminated string"), *json_ptr, limit); return NULL; } // Advance the text buffer to just beyond the closing quote. *json_ptr = end + 1; if (saw_backslash) { return S_unescape_text(top, end); } else { // Optimize common case where there are no escapes. size_t len = end - top; if (!StrHelp_utf8_valid(top, len)) { CharBuf *mess = MAKE_MESS("Bad UTF-8 in JSON"); Err_set_error(Err_new(mess)); return NULL; } return CB_new_from_trusted_utf8(top, len); } }
Folder* Folder_init(Folder *self, const CharBuf *path) { // Init. self->entries = Hash_new(16); // Copy. if (path == NULL) { self->path = CB_new_from_trusted_utf8("", 0); } else { // Copy path, strip trailing slash or equivalent. self->path = CB_Clone(path); if (CB_Ends_With_Str(self->path, DIR_SEP, strlen(DIR_SEP))) { CB_Chop(self->path, 1); } } ABSTRACT_CLASS_CHECK(self, FOLDER); return self; }
String* ORQuery_To_String_IMP(ORQuery *self) { ORQueryIVARS *const ivars = ORQuery_IVARS(self); uint32_t num_kids = Vec_Get_Size(ivars->children); if (!num_kids) { return Str_new_from_trusted_utf8("()", 2); } else { CharBuf *buf = CB_new_from_trusted_utf8("(", 1); uint32_t last_kid = num_kids - 1; for (uint32_t i = 0; i < num_kids; i++) { String *kid_string = Obj_To_String(Vec_Fetch(ivars->children, i)); CB_Cat(buf, kid_string); DECREF(kid_string); if (i == last_kid) { CB_Cat_Trusted_Utf8(buf, ")", 1); } else { CB_Cat_Trusted_Utf8(buf, " OR ", 4); } } String *retval = CB_Yield_String(buf); DECREF(buf); return retval; } }
void TestQPLogic_run_tests() { uint32_t i; TestBatch *batch = TestBatch_new(258); Folder *folder = S_create_index(); IndexSearcher *searcher = IxSearcher_new((Obj*)folder); QueryParser *or_parser = QParser_new(IxSearcher_Get_Schema(searcher), NULL, NULL, NULL); ZombieCharBuf *AND = ZCB_WRAP_STR("AND", 3); QueryParser *and_parser = QParser_new(IxSearcher_Get_Schema(searcher), NULL, (CharBuf*)AND, NULL); QParser_Set_Heed_Colons(or_parser, true); QParser_Set_Heed_Colons(and_parser, true); TestBatch_Plan(batch); // Run logical tests with default boolop of OR. for (i = 0; logical_test_funcs[i] != NULL; i++) { Lucy_TestQPLogic_Logical_Test_t test_func = logical_test_funcs[i]; TestQueryParser *test_case = test_func(BOOLOP_OR); Query *tree = QParser_Tree(or_parser, test_case->query_string); Query *parsed = QParser_Parse(or_parser, test_case->query_string); Hits *hits = IxSearcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL); TEST_TRUE(batch, Query_Equals(tree, (Obj*)test_case->tree), "tree() OR %s", (char*)CB_Get_Ptr8(test_case->query_string)); TEST_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits, "hits: OR %s", (char*)CB_Get_Ptr8(test_case->query_string)); DECREF(hits); DECREF(parsed); DECREF(tree); DECREF(test_case); } // Run logical tests with default boolop of AND. for (i = 0; logical_test_funcs[i] != NULL; i++) { Lucy_TestQPLogic_Logical_Test_t test_func = logical_test_funcs[i]; TestQueryParser *test_case = test_func(BOOLOP_AND); Query *tree = QParser_Tree(and_parser, test_case->query_string); Query *parsed = QParser_Parse(and_parser, test_case->query_string); Hits *hits = IxSearcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL); TEST_TRUE(batch, Query_Equals(tree, (Obj*)test_case->tree), "tree() AND %s", (char*)CB_Get_Ptr8(test_case->query_string)); TEST_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits, "hits: AND %s", (char*)CB_Get_Ptr8(test_case->query_string)); DECREF(hits); DECREF(parsed); DECREF(tree); DECREF(test_case); } // Run tests for QParser_Prune(). for (i = 0; prune_test_funcs[i] != NULL; i++) { Lucy_TestQPLogic_Prune_Test_t test_func = prune_test_funcs[i]; TestQueryParser *test_case = test_func(); CharBuf *qstring = test_case->tree ? Query_To_String(test_case->tree) : CB_new_from_trusted_utf8("(NULL)", 6); Query *tree = test_case->tree; Query *wanted = test_case->expanded; Query *pruned = QParser_Prune(or_parser, tree); Query *expanded; Hits *hits; TEST_TRUE(batch, Query_Equals(pruned, (Obj*)wanted), "prune() %s", (char*)CB_Get_Ptr8(qstring)); expanded = QParser_Expand(or_parser, pruned); hits = IxSearcher_Hits(searcher, (Obj*)expanded, 0, 10, NULL); TEST_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits, "hits: %s", (char*)CB_Get_Ptr8(qstring)); DECREF(hits); DECREF(expanded); DECREF(pruned); DECREF(qstring); DECREF(test_case); } DECREF(and_parser); DECREF(or_parser); DECREF(searcher); DECREF(folder); DECREF(batch); }
CharBuf* TextSortCache_make_blank(TextSortCache *self) { UNUSED_VAR(self); return CB_new_from_trusted_utf8("", 0); }
CharBuf* NoMatchQuery_to_string(NoMatchQuery *self) { UNUSED_VAR(self); return CB_new_from_trusted_utf8("[NOMATCH]", 9); }
void TestQPLogic_run_tests() { u32_t i; TestBatch *batch = Test_new_batch("TestQueryParserLogic", 178, NULL); Folder *folder = S_create_index(); Searcher *searcher = Searcher_new((Obj*)folder); QueryParser *or_parser = QParser_new(Searcher_Get_Schema(searcher), NULL, NULL, NULL); static ZombieCharBuf AND = ZCB_LITERAL("AND"); QueryParser *and_parser = QParser_new(Searcher_Get_Schema(searcher), NULL, (CharBuf*)&AND, NULL); QParser_Set_Heed_Colons(or_parser, true); QParser_Set_Heed_Colons(and_parser, true); PLAN(batch); /* Run logical tests with default boolop of OR. */ for (i = 0; logical_test_funcs[i] != NULL; i++) { kino_TestQPLogic_logical_test_t test_func = logical_test_funcs[i]; TestQueryParser *test_case = test_func(BOOLOP_OR); Query *tree = QParser_Tree(or_parser, test_case->query_string); Query *parsed = QParser_Parse(or_parser, test_case->query_string); Hits *hits = Searcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL); ASSERT_TRUE(batch, Query_Equals(tree, (Obj*)test_case->tree), "tree() OR %s", test_case->query_string->ptr); ASSERT_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits, "hits: OR %s", test_case->query_string->ptr); DECREF(hits); DECREF(parsed); DECREF(tree); DECREF(test_case); } /* Run logical tests with default boolop of AND. */ for (i = 0; logical_test_funcs[i] != NULL; i++) { kino_TestQPLogic_logical_test_t test_func = logical_test_funcs[i]; TestQueryParser *test_case = test_func(BOOLOP_AND); Query *tree = QParser_Tree(and_parser, test_case->query_string); Query *parsed = QParser_Parse(and_parser, test_case->query_string); Hits *hits = Searcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL); ASSERT_TRUE(batch, Query_Equals(tree, (Obj*)test_case->tree), "tree() AND %s", test_case->query_string->ptr); ASSERT_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits, "hits: AND %s", test_case->query_string->ptr); DECREF(hits); DECREF(parsed); DECREF(tree); DECREF(test_case); } /* Run tests for QParser_Prune(). */ for (i = 0; prune_test_funcs[i] != NULL; i++) { kino_TestQPLogic_prune_test_t test_func = prune_test_funcs[i]; TestQueryParser *test_case = test_func(); CharBuf *qstring = test_case->tree ? Obj_To_String(test_case->tree) : CB_new_from_trusted_utf8("(NULL)", 6); Query *tree = test_case->tree; Query *wanted = test_case->expanded; Query *pruned = QParser_Prune(or_parser, tree); Query *expanded; Hits *hits; ASSERT_TRUE(batch, Query_Equals(pruned, (Obj*)wanted), "prune() %s", qstring->ptr); expanded = QParser_Expand(or_parser, pruned); hits = Searcher_Hits(searcher, (Obj*)expanded, 0, 10, NULL); ASSERT_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits, "hits: %s", qstring->ptr); DECREF(hits); DECREF(expanded); DECREF(pruned); DECREF(qstring); DECREF(test_case); } DECREF(and_parser); DECREF(or_parser); DECREF(searcher); DECREF(folder); batch->destroy(batch); }
static void S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer, OutStream *skip_stream) { PostingPoolIVARS *const ivars = PostPool_IVARS(self); TermInfo *const tinfo = TInfo_new(0); TermInfo *const skip_tinfo = TInfo_new(0); TermInfoIVARS *const tinfo_ivars = TInfo_IVARS(tinfo); TermInfoIVARS *const skip_tinfo_ivars = TInfo_IVARS(skip_tinfo); LexiconWriter *const lex_writer = ivars->lex_writer; SkipStepper *const skip_stepper = ivars->skip_stepper; SkipStepperIVARS *const skip_stepper_ivars = SkipStepper_IVARS(skip_stepper); int32_t last_skip_doc = 0; int64_t last_skip_filepos = 0; const int32_t skip_interval = Arch_Skip_Interval(Schema_Get_Architecture(ivars->schema)); // Prime heldover variables. RawPosting *posting = (RawPosting*)CERTIFY(PostPool_Fetch(self), RAWPOSTING); RawPostingIVARS *post_ivars = RawPost_IVARS(posting); CharBuf *last_term_text = CB_new_from_trusted_utf8(post_ivars->blob, post_ivars->content_len); const char *last_text_buf = CB_Get_Ptr8(last_term_text); uint32_t last_text_size = CB_Get_Size(last_term_text); SkipStepper_Set_ID_And_Filepos(skip_stepper, 0, 0); // Initialize sentinel to be used on the last iter, using an empty string // in order to make LexiconWriter Do The Right Thing. size_t sentinel_size = Class_Get_Obj_Alloc_Size(RAWPOSTING) + 20; // blob length + cushion char empty_string[] = ""; RawPosting *sentinel = RawPost_new(alloca(sentinel_size), 0, 1, empty_string, 0); while (1) { bool same_text_as_last = true; if (posting == NULL) { // On the last iter, use an empty string to make LexiconWriter // DTRT. posting = sentinel; post_ivars = RawPost_IVARS(posting); same_text_as_last = false; } else { // Compare once. if (post_ivars->content_len != last_text_size || memcmp(&post_ivars->blob, last_text_buf, last_text_size) != 0 ) { same_text_as_last = false; } } // If the term text changes, process the last term. if (!same_text_as_last) { // Hand off to LexiconWriter. LexWriter_Add_Term(lex_writer, (Obj*)last_term_text, tinfo); // Start each term afresh. TInfo_Reset(tinfo); PostWriter_Start_Term(post_writer, tinfo); // Init skip data in preparation for the next term. skip_stepper_ivars->doc_id = 0; skip_stepper_ivars->filepos = tinfo_ivars->post_filepos; last_skip_doc = 0; last_skip_filepos = tinfo_ivars->post_filepos; // Remember the term_text so we can write string diffs. CB_Mimic_Utf8(last_term_text, post_ivars->blob, post_ivars->content_len); last_text_buf = CB_Get_Ptr8(last_term_text); last_text_size = CB_Get_Size(last_term_text); } // Bail on last iter before writing invalid posting data. if (posting == sentinel) { break; } // Write posting data. PostWriter_Write_Posting(post_writer, posting); // Doc freq lags by one iter. tinfo_ivars->doc_freq++; // Write skip data. if (skip_stream != NULL && same_text_as_last && tinfo_ivars->doc_freq % skip_interval == 0 && tinfo_ivars->doc_freq != 0 ) { // If first skip group, save skip stream pos for term info. if (tinfo_ivars->doc_freq == skip_interval) { tinfo_ivars->skip_filepos = OutStream_Tell(skip_stream); } // Write deltas. last_skip_doc = skip_stepper_ivars->doc_id; last_skip_filepos = skip_stepper_ivars->filepos; skip_stepper_ivars->doc_id = post_ivars->doc_id; PostWriter_Update_Skip_Info(post_writer, skip_tinfo); skip_stepper_ivars->filepos = skip_tinfo_ivars->post_filepos; SkipStepper_Write_Record(skip_stepper, skip_stream, last_skip_doc, last_skip_filepos); } // Retrieve the next posting from the sort pool. // DECREF(posting); // No!! DON'T destroy!!! posting = (RawPosting*)PostPool_Fetch(self); post_ivars = RawPost_IVARS(posting); } // Clean up. DECREF(last_term_text); DECREF(skip_tinfo); DECREF(tinfo); }