예제 #1
0
Hash*
Normalizer_dump(Normalizer *self) {
    Normalizer_Dump_t super_dump
        = SUPER_METHOD_PTR(NORMALIZER, Lucy_Normalizer_Dump);
    Hash *dump = super_dump(self);
    int options = self->options;

    CharBuf *form = options & UTF8PROC_COMPOSE ?
                    options & UTF8PROC_COMPAT ?
                    CB_new_from_trusted_utf8("NFKC", 4) :
                    CB_new_from_trusted_utf8("NFC", 3) :
                        options & UTF8PROC_COMPAT ?
                        CB_new_from_trusted_utf8("NFKD", 4) :
                        CB_new_from_trusted_utf8("NFD", 3);

    Hash_Store_Str(dump, "normalization_form", 18, (Obj*)form);

    BoolNum *case_fold = Bool_singleton(options & UTF8PROC_CASEFOLD);
    Hash_Store_Str(dump, "case_fold", 9, (Obj*)case_fold);

    BoolNum *strip_accents = Bool_singleton(options & UTF8PROC_STRIPMARK);
    Hash_Store_Str(dump, "strip_accents", 13, (Obj*)strip_accents);

    return dump;
}
예제 #2
0
파일: CharBuf.c 프로젝트: pavansondur/lucy
CharBuf*
CB_new_from_utf8(const char *ptr, size_t size) {
    if (!StrHelp_utf8_valid(ptr, size)) {
        DIE_INVALID_UTF8(ptr, size);
    }
    return CB_new_from_trusted_utf8(ptr, size);
}
예제 #3
0
파일: TestHash.c 프로젝트: pavansondur/lucy
static void
test_Dump_and_Load(TestBatch *batch) {
    Hash *hash = Hash_new(0);
    Obj  *dump;
    Hash *loaded;

    Hash_Store_Str(hash, "foo", 3,
                   (Obj*)CB_new_from_trusted_utf8("foo", 3));
    dump = (Obj*)Hash_Dump(hash);
    loaded = (Hash*)Obj_Load(dump, dump);
    TEST_TRUE(batch, Hash_Equals(hash, (Obj*)loaded),
              "Dump => Load round trip");
    DECREF(dump);
    DECREF(loaded);

    /* TODO: Fix Hash_Load().

    Hash_Store_Str(hash, "_class", 6,
        (Obj*)CB_new_from_trusted_utf8("not_a_class", 11));
    dump = (Obj*)Hash_Dump(hash);
    loaded = (Hash*)Obj_Load(dump, dump);

    TEST_TRUE(batch, Hash_Equals(hash, (Obj*)loaded),
              "Load still works with _class if it's not a real class");
    DECREF(dump);
    DECREF(loaded);

    */

    DECREF(hash);
}
예제 #4
0
CharBuf*
RangeQuery_to_string(RangeQuery *self) {
    CharBuf *lower_term_str = self->lower_term
                              ? Obj_To_String(self->lower_term)
                              : CB_new_from_trusted_utf8("*", 1);
    CharBuf *upper_term_str = self->upper_term
                              ? Obj_To_String(self->upper_term)
                              : CB_new_from_trusted_utf8("*", 1);
    CharBuf *retval = CB_newf("%o:%s%o TO %o%s", self->field,
                              self->include_lower ? "[" : "{",
                              lower_term_str,
                              upper_term_str,
                              self->include_upper ? "]" : "}"
                             );
    DECREF(upper_term_str);
    DECREF(lower_term_str);
    return retval;
}
예제 #5
0
파일: Compiler.c 프로젝트: pavansondur/lucy
CharBuf*
Compiler_to_string(Compiler *self) {
    CharBuf *stringified_query = Query_To_String(self->parent);
    CharBuf *string = CB_new_from_trusted_utf8("compiler(", 9);
    CB_Cat(string, stringified_query);
    CB_Cat_Trusted_Str(string, ")", 1);
    DECREF(stringified_query);
    return string;
}
예제 #6
0
static void
vcatf_f64(VArray *tests)
{
    CharBuf *wanted;
    char buf[64];
    float num = 1.3f;
    CharBuf *got = S_get_cb("foo ");
    sprintf(buf, "foo bar %g baz", num);
    wanted = CB_new_from_trusted_utf8(buf, strlen(buf));
    CB_catf(got, "bar %f64 baz", num);
    VA_Push(tests, (Obj*)TestCB_new(wanted, got));
}
예제 #7
0
static void
test_vcatf_f64(TestBatch *batch) {
    CharBuf *wanted;
    char buf[64];
    float num = 1.3f;
    CharBuf *got = S_get_cb("foo ");
    sprintf(buf, "foo bar %g baz", num);
    wanted = CB_new_from_trusted_utf8(buf, strlen(buf));
    CB_catf(got, "bar %f64 baz", num);
    TEST_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%f64");
    DECREF(wanted);
    DECREF(got);
}
예제 #8
0
static void
vcatf_x32(VArray *tests)
{
    CharBuf *wanted;
    char buf[64];
    unsigned long num = I32_MAX;
    CharBuf *got = S_get_cb("foo ");
#if (SIZEOF_LONG == 4)
    sprintf(buf, "foo bar %.8lx baz", num);
#elif (SIZEOF_INT == 4)
    sprintf(buf, "foo bar %.8x baz", (unsigned)num);
#endif
    wanted = CB_new_from_trusted_utf8(buf, strlen(buf));
    CB_catf(got, "bar %x32 baz", (u32_t)num);
    VA_Push(tests, (Obj*)TestCB_new(wanted, got));
}
예제 #9
0
CharBuf*
StrHelp_to_base36(u32_t num) 
{
    char buffer[11];
    char *buf = buffer + 10;

    /* Null terminate. */
    *buf = '\0';

    /* Convert to base 36 characters. */
    do {
        *(--buf) = base36_chars[ num % 36 ];
        num /= 36;
    } while (num > 0);

    return CB_new_from_trusted_utf8(buf, strlen(buf));
}
예제 #10
0
static void
test_vcatf_x32(TestBatch *batch) {
    CharBuf *wanted;
    char buf[64];
    unsigned long num = I32_MAX;
    CharBuf *got = S_get_cb("foo ");
#if (SIZEOF_LONG == 4)
    sprintf(buf, "foo bar %.8lx baz", num);
#elif (SIZEOF_INT == 4)
    sprintf(buf, "foo bar %.8x baz", (unsigned)num);
#endif
    wanted = CB_new_from_trusted_utf8(buf, strlen(buf));
    CB_catf(got, "bar %x32 baz", (uint32_t)num);
    TEST_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%x32");
    DECREF(wanted);
    DECREF(got);
}
예제 #11
0
IndexManager*
IxManager_init(IndexManager *self, const CharBuf *host, 
               LockFactory *lock_factory)
{
    self->host                = host 
                              ? CB_Clone(host) 
                              : CB_new_from_trusted_utf8("", 0);
    self->lock_factory        = (LockFactory*)INCREF(lock_factory);
    self->folder              = NULL;
    self->write_lock_timeout  = 1000;
    self->write_lock_interval = 100;
    self->merge_lock_timeout  = 0;
    self->merge_lock_interval = 1000;
    self->deletion_lock_timeout  = 1000;
    self->deletion_lock_interval = 100;

    return self;
}
예제 #12
0
파일: Json.c 프로젝트: pavansondur/lucy
static CharBuf*
S_parse_string(char **json_ptr, char *const limit) {
    // Find terminating double quote, determine whether there are any escapes.
    char *top = *json_ptr + 1;
    char *end = NULL;
    bool_t saw_backslash = false;
    for (char *text = top; text < limit; text++) {
        if (*text == '"') {
            end = text;
            break;
        }
        else if (*text == '\\') {
            saw_backslash = true;
            if (text + 1 < limit && text[1] == 'u') {
                text += 5;
            }
            else {
                text += 1;
            }
        }
    }
    if (!end) {
        SET_ERROR(CB_newf("Unterminated string"), *json_ptr, limit);
        return NULL;
    }

    // Advance the text buffer to just beyond the closing quote.
    *json_ptr = end + 1;

    if (saw_backslash) {
        return S_unescape_text(top, end);
    }
    else {
        // Optimize common case where there are no escapes.
        size_t len = end - top;
        if (!StrHelp_utf8_valid(top, len)) {
            CharBuf *mess = MAKE_MESS("Bad UTF-8 in JSON");
            Err_set_error(Err_new(mess));
            return NULL;
        }
        return CB_new_from_trusted_utf8(top, len);
    }
}
예제 #13
0
파일: Folder.c 프로젝트: pavansondur/lucy
Folder*
Folder_init(Folder *self, const CharBuf *path) {
    // Init.
    self->entries = Hash_new(16);

    // Copy.
    if (path == NULL) {
        self->path = CB_new_from_trusted_utf8("", 0);
    }
    else {
        // Copy path, strip trailing slash or equivalent.
        self->path = CB_Clone(path);
        if (CB_Ends_With_Str(self->path, DIR_SEP, strlen(DIR_SEP))) {
            CB_Chop(self->path, 1);
        }
    }

    ABSTRACT_CLASS_CHECK(self, FOLDER);
    return self;
}
예제 #14
0
파일: ORQuery.c 프로젝트: kidaa/lucy
String*
ORQuery_To_String_IMP(ORQuery *self) {
    ORQueryIVARS *const ivars = ORQuery_IVARS(self);
    uint32_t num_kids = Vec_Get_Size(ivars->children);
    if (!num_kids) { return Str_new_from_trusted_utf8("()", 2); }
    else {
        CharBuf *buf = CB_new_from_trusted_utf8("(", 1);
        uint32_t last_kid = num_kids - 1;
        for (uint32_t i = 0; i < num_kids; i++) {
            String *kid_string = Obj_To_String(Vec_Fetch(ivars->children, i));
            CB_Cat(buf, kid_string);
            DECREF(kid_string);
            if (i == last_kid) {
                CB_Cat_Trusted_Utf8(buf, ")", 1);
            }
            else {
                CB_Cat_Trusted_Utf8(buf, " OR ", 4);
            }
        }
        String *retval = CB_Yield_String(buf);
        DECREF(buf);
        return retval;
    }
}
예제 #15
0
void
TestQPLogic_run_tests() {
    uint32_t i;
    TestBatch     *batch      = TestBatch_new(258);
    Folder        *folder     = S_create_index();
    IndexSearcher *searcher   = IxSearcher_new((Obj*)folder);
    QueryParser   *or_parser  = QParser_new(IxSearcher_Get_Schema(searcher),
                                            NULL, NULL, NULL);
    ZombieCharBuf *AND        = ZCB_WRAP_STR("AND", 3);
    QueryParser   *and_parser = QParser_new(IxSearcher_Get_Schema(searcher),
                                            NULL, (CharBuf*)AND, NULL);
    QParser_Set_Heed_Colons(or_parser, true);
    QParser_Set_Heed_Colons(and_parser, true);

    TestBatch_Plan(batch);

    // Run logical tests with default boolop of OR.
    for (i = 0; logical_test_funcs[i] != NULL; i++) {
        Lucy_TestQPLogic_Logical_Test_t test_func = logical_test_funcs[i];
        TestQueryParser *test_case = test_func(BOOLOP_OR);
        Query *tree     = QParser_Tree(or_parser, test_case->query_string);
        Query *parsed   = QParser_Parse(or_parser, test_case->query_string);
        Hits  *hits     = IxSearcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL);

        TEST_TRUE(batch, Query_Equals(tree, (Obj*)test_case->tree),
                  "tree() OR   %s", (char*)CB_Get_Ptr8(test_case->query_string));
        TEST_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits,
                    "hits: OR   %s", (char*)CB_Get_Ptr8(test_case->query_string));
        DECREF(hits);
        DECREF(parsed);
        DECREF(tree);
        DECREF(test_case);
    }

    // Run logical tests with default boolop of AND.
    for (i = 0; logical_test_funcs[i] != NULL; i++) {
        Lucy_TestQPLogic_Logical_Test_t test_func = logical_test_funcs[i];
        TestQueryParser *test_case = test_func(BOOLOP_AND);
        Query *tree     = QParser_Tree(and_parser, test_case->query_string);
        Query *parsed   = QParser_Parse(and_parser, test_case->query_string);
        Hits  *hits     = IxSearcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL);

        TEST_TRUE(batch, Query_Equals(tree, (Obj*)test_case->tree),
                  "tree() AND   %s", (char*)CB_Get_Ptr8(test_case->query_string));
        TEST_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits,
                    "hits: AND   %s", (char*)CB_Get_Ptr8(test_case->query_string));
        DECREF(hits);
        DECREF(parsed);
        DECREF(tree);
        DECREF(test_case);
    }

    // Run tests for QParser_Prune().
    for (i = 0; prune_test_funcs[i] != NULL; i++) {
        Lucy_TestQPLogic_Prune_Test_t test_func = prune_test_funcs[i];
        TestQueryParser *test_case = test_func();
        CharBuf *qstring = test_case->tree
                           ? Query_To_String(test_case->tree)
                           : CB_new_from_trusted_utf8("(NULL)", 6);
        Query *tree = test_case->tree;
        Query *wanted = test_case->expanded;
        Query *pruned   = QParser_Prune(or_parser, tree);
        Query *expanded;
        Hits  *hits;

        TEST_TRUE(batch, Query_Equals(pruned, (Obj*)wanted),
                  "prune()   %s", (char*)CB_Get_Ptr8(qstring));
        expanded = QParser_Expand(or_parser, pruned);
        hits = IxSearcher_Hits(searcher, (Obj*)expanded, 0, 10, NULL);
        TEST_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits,
                    "hits:    %s", (char*)CB_Get_Ptr8(qstring));

        DECREF(hits);
        DECREF(expanded);
        DECREF(pruned);
        DECREF(qstring);
        DECREF(test_case);
    }

    DECREF(and_parser);
    DECREF(or_parser);
    DECREF(searcher);
    DECREF(folder);
    DECREF(batch);
}
예제 #16
0
CharBuf*
TextSortCache_make_blank(TextSortCache *self)
{
    UNUSED_VAR(self);
    return CB_new_from_trusted_utf8("", 0);
}
예제 #17
0
CharBuf*
NoMatchQuery_to_string(NoMatchQuery *self) {
    UNUSED_VAR(self);
    return CB_new_from_trusted_utf8("[NOMATCH]", 9);
}
void
TestQPLogic_run_tests()
{
    u32_t i;
    TestBatch   *batch = Test_new_batch("TestQueryParserLogic", 178, NULL);
    Folder      *folder       = S_create_index();
    Searcher    *searcher     = Searcher_new((Obj*)folder);
    QueryParser *or_parser    = QParser_new(Searcher_Get_Schema(searcher), 
        NULL, NULL, NULL);
    static  ZombieCharBuf AND = ZCB_LITERAL("AND");
    QueryParser *and_parser   = QParser_new(Searcher_Get_Schema(searcher), 
        NULL, (CharBuf*)&AND, NULL);
    QParser_Set_Heed_Colons(or_parser, true);
    QParser_Set_Heed_Colons(and_parser, true);

    PLAN(batch);

    /* Run logical tests with default boolop of OR. */
    for (i = 0; logical_test_funcs[i] != NULL; i++) {
        kino_TestQPLogic_logical_test_t test_func = logical_test_funcs[i];
        TestQueryParser *test_case = test_func(BOOLOP_OR);
        Query *tree     = QParser_Tree(or_parser, test_case->query_string);
        Query *parsed   = QParser_Parse(or_parser, test_case->query_string);
        Hits  *hits     = Searcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL);

        ASSERT_TRUE(batch, Query_Equals(tree, (Obj*)test_case->tree),
            "tree() OR   %s", test_case->query_string->ptr);
        ASSERT_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits,
            "hits: OR   %s", test_case->query_string->ptr);
        DECREF(hits);
        DECREF(parsed);
        DECREF(tree);
        DECREF(test_case);
    }

    /* Run logical tests with default boolop of AND. */
    for (i = 0; logical_test_funcs[i] != NULL; i++) {
        kino_TestQPLogic_logical_test_t test_func = logical_test_funcs[i];
        TestQueryParser *test_case = test_func(BOOLOP_AND);
        Query *tree     = QParser_Tree(and_parser, test_case->query_string);
        Query *parsed   = QParser_Parse(and_parser, test_case->query_string);
        Hits  *hits     = Searcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL);

        ASSERT_TRUE(batch, Query_Equals(tree, (Obj*)test_case->tree),
            "tree() AND   %s", test_case->query_string->ptr);
        ASSERT_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits,
            "hits: AND   %s", test_case->query_string->ptr);
        DECREF(hits);
        DECREF(parsed);
        DECREF(tree);
        DECREF(test_case);
    }

    /* Run tests for QParser_Prune(). */
    for (i = 0; prune_test_funcs[i] != NULL; i++) {
        kino_TestQPLogic_prune_test_t test_func = prune_test_funcs[i];
        TestQueryParser *test_case = test_func();
        CharBuf *qstring = test_case->tree 
                         ? Obj_To_String(test_case->tree)
                         : CB_new_from_trusted_utf8("(NULL)", 6);
        Query *tree = test_case->tree;
        Query *wanted = test_case->expanded;
        Query *pruned   = QParser_Prune(or_parser, tree);
        Query *expanded;
        Hits  *hits;

        ASSERT_TRUE(batch, Query_Equals(pruned, (Obj*)wanted),
            "prune()   %s", qstring->ptr);
        expanded = QParser_Expand(or_parser, pruned);
        hits = Searcher_Hits(searcher, (Obj*)expanded, 0, 10, NULL);
        ASSERT_INT_EQ(batch, Hits_Total_Hits(hits), test_case->num_hits,
            "hits:    %s", qstring->ptr);

        DECREF(hits);
        DECREF(expanded);
        DECREF(pruned);
        DECREF(qstring);
        DECREF(test_case);
    }

    DECREF(and_parser);
    DECREF(or_parser);
    DECREF(searcher);
    DECREF(folder);
    batch->destroy(batch);
}
예제 #19
0
static void
S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer,
                           OutStream *skip_stream) {
    PostingPoolIVARS *const ivars = PostPool_IVARS(self);
    TermInfo      *const tinfo            = TInfo_new(0);
    TermInfo      *const skip_tinfo       = TInfo_new(0);
    TermInfoIVARS *const tinfo_ivars      = TInfo_IVARS(tinfo);
    TermInfoIVARS *const skip_tinfo_ivars = TInfo_IVARS(skip_tinfo);
    LexiconWriter *const lex_writer       = ivars->lex_writer;
    SkipStepper   *const skip_stepper     = ivars->skip_stepper;
    SkipStepperIVARS *const skip_stepper_ivars
        = SkipStepper_IVARS(skip_stepper);
    int32_t        last_skip_doc          = 0;
    int64_t        last_skip_filepos      = 0;
    const int32_t  skip_interval
        = Arch_Skip_Interval(Schema_Get_Architecture(ivars->schema));

    // Prime heldover variables.
    RawPosting *posting
        = (RawPosting*)CERTIFY(PostPool_Fetch(self), RAWPOSTING);
    RawPostingIVARS *post_ivars = RawPost_IVARS(posting);
    CharBuf *last_term_text
        = CB_new_from_trusted_utf8(post_ivars->blob, post_ivars->content_len);
    const char *last_text_buf  = CB_Get_Ptr8(last_term_text);
    uint32_t    last_text_size = CB_Get_Size(last_term_text);
    SkipStepper_Set_ID_And_Filepos(skip_stepper, 0, 0);

    // Initialize sentinel to be used on the last iter, using an empty string
    // in order to make LexiconWriter Do The Right Thing.
    size_t sentinel_size = Class_Get_Obj_Alloc_Size(RAWPOSTING)
                           + 20;  // blob length + cushion
    char empty_string[] = "";
    RawPosting *sentinel = RawPost_new(alloca(sentinel_size), 0, 1,
                                       empty_string, 0);

    while (1) {
        bool same_text_as_last = true;

        if (posting == NULL) {
            // On the last iter, use an empty string to make LexiconWriter
            // DTRT.
            posting = sentinel;
            post_ivars = RawPost_IVARS(posting);
            same_text_as_last = false;
        }
        else {
            // Compare once.
            if (post_ivars->content_len != last_text_size
                || memcmp(&post_ivars->blob, last_text_buf, last_text_size) != 0
               ) {
                same_text_as_last = false;
            }
        }

        // If the term text changes, process the last term.
        if (!same_text_as_last) {
            // Hand off to LexiconWriter.
            LexWriter_Add_Term(lex_writer, (Obj*)last_term_text, tinfo);

            // Start each term afresh.
            TInfo_Reset(tinfo);
            PostWriter_Start_Term(post_writer, tinfo);

            // Init skip data in preparation for the next term.
            skip_stepper_ivars->doc_id  = 0;
            skip_stepper_ivars->filepos = tinfo_ivars->post_filepos;
            last_skip_doc         = 0;
            last_skip_filepos     = tinfo_ivars->post_filepos;

            // Remember the term_text so we can write string diffs.
            CB_Mimic_Utf8(last_term_text, post_ivars->blob,
                          post_ivars->content_len);
            last_text_buf  = CB_Get_Ptr8(last_term_text);
            last_text_size = CB_Get_Size(last_term_text);
        }

        // Bail on last iter before writing invalid posting data.
        if (posting == sentinel) { break; }

        // Write posting data.
        PostWriter_Write_Posting(post_writer, posting);

        // Doc freq lags by one iter.
        tinfo_ivars->doc_freq++;

        //  Write skip data.
        if (skip_stream != NULL
            && same_text_as_last
            && tinfo_ivars->doc_freq % skip_interval == 0
            && tinfo_ivars->doc_freq != 0
           ) {
            // If first skip group, save skip stream pos for term info.
            if (tinfo_ivars->doc_freq == skip_interval) {
                tinfo_ivars->skip_filepos = OutStream_Tell(skip_stream);
            }
            // Write deltas.
            last_skip_doc               = skip_stepper_ivars->doc_id;
            last_skip_filepos           = skip_stepper_ivars->filepos;
            skip_stepper_ivars->doc_id  = post_ivars->doc_id;
            PostWriter_Update_Skip_Info(post_writer, skip_tinfo);
            skip_stepper_ivars->filepos = skip_tinfo_ivars->post_filepos;
            SkipStepper_Write_Record(skip_stepper, skip_stream,
                                     last_skip_doc, last_skip_filepos);
        }

        // Retrieve the next posting from the sort pool.
        // DECREF(posting);  // No!!  DON'T destroy!!!

        posting = (RawPosting*)PostPool_Fetch(self);
        post_ivars = RawPost_IVARS(posting);
    }

    // Clean up.
    DECREF(last_term_text);
    DECREF(skip_tinfo);
    DECREF(tinfo);
}