コード例 #1
0
ファイル: getting_started.c プロジェクト: carriercomm/lucy
static void
S_add_document(Indexer *indexer, const char *title, const char *content) {
    Doc *doc = Doc_new(NULL, 0);

    {
        // Store 'title' field   
        String *field_str = Str_newf("title");
        String *value_str = Str_new_from_utf8(title, strlen(title));
        Doc_Store(doc, field_str, (Obj*)value_str);
        DECREF(field_str);
        DECREF(value_str);
    }

    {
        // Store 'content' field   
        String *field_str = Str_newf("content");
        String *value_str = Str_new_from_utf8(content, strlen(content));
        Doc_Store(doc, field_str, (Obj*)value_str);
        DECREF(field_str);
        DECREF(value_str);
    }

    Indexer_Add_Doc(indexer, doc, 1.0);

    DECREF(doc);
}
コード例 #2
0
ファイル: TestHighlighter.c プロジェクト: carriercomm/lucy
static void
test_highlighting(TestBatchRunner *runner) {
    Schema *schema = Schema_new();
    StandardTokenizer *tokenizer = StandardTokenizer_new();
    FullTextType *plain_type = FullTextType_new((Analyzer*)tokenizer);
    FullTextType_Set_Highlightable(plain_type, true);
    FullTextType *dunked_type = FullTextType_new((Analyzer*)tokenizer);
    FullTextType_Set_Highlightable(dunked_type, true);
    FullTextType_Set_Boost(dunked_type, 0.1f);
    String *content = (String*)SSTR_WRAP_UTF8("content", 7);
    Schema_Spec_Field(schema, content, (FieldType*)plain_type);
    String *alt = (String*)SSTR_WRAP_UTF8("alt", 3);
    Schema_Spec_Field(schema, alt, (FieldType*)dunked_type);
    DECREF(plain_type);
    DECREF(dunked_type);
    DECREF(tokenizer);

    RAMFolder *folder = RAMFolder_new(NULL);
    Indexer *indexer = Indexer_new(schema, (Obj*)folder, NULL, 0);

    Doc *doc = Doc_new(NULL, 0);
    String *string = (String *)SSTR_WRAP_UTF8(TEST_STRING, TEST_STRING_LEN);
    Doc_Store(doc, content, (Obj*)string);
    Indexer_Add_Doc(indexer, doc, 1.0f);
    DECREF(doc);

    doc = Doc_new(NULL, 0);
    string = (String *)SSTR_WRAP_UTF8("\"I see,\" said the blind man.", 28);
    Doc_Store(doc, content, (Obj*)string);
    Indexer_Add_Doc(indexer, doc, 1.0f);
    DECREF(doc);

    doc = Doc_new(NULL, 0);
    string = (String *)SSTR_WRAP_UTF8("x but not why or 2ee", 20);
    Doc_Store(doc, content, (Obj*)string);
    string = (String *)SSTR_WRAP_UTF8(TEST_STRING
                                     " and extra stuff so it scores lower",
                                     TEST_STRING_LEN + 35);
    Doc_Store(doc, alt, (Obj*)string);
    Indexer_Add_Doc(indexer, doc, 1.0f);
    DECREF(doc);

    Indexer_Commit(indexer);
    DECREF(indexer);

    Searcher *searcher = (Searcher*)IxSearcher_new((Obj*)folder);
    Obj *query = (Obj*)SSTR_WRAP_UTF8("\"x y z\" AND " PHI, 14);
    Hits *hits = Searcher_Hits(searcher, query, 0, 10, NULL);

    test_Raw_Excerpt(runner, searcher, query);
    test_Highlight_Excerpt(runner, searcher, query);
    test_Create_Excerpt(runner, searcher, query, hits);

    DECREF(hits);
    DECREF(searcher);
    DECREF(folder);
    DECREF(schema);
}
コード例 #3
0
static Folder*
S_create_index()
{
    Schema     *schema  = (Schema*)TestSchema_new();
    RAMFolder  *folder  = RAMFolder_new(NULL);
    VArray     *doc_set = TestUtils_doc_set();
    Indexer    *indexer = Indexer_new(schema, (Obj*)folder, NULL, NULL, 0);
    u32_t i, max;

    for (i = 0, max = VA_Get_Size(doc_set); i < max; i++) {
        static CharBuf field = ZCB_LITERAL("content");
        Doc *doc = Doc_new(NULL, 0);
        Doc_Store(doc, &field, VA_Fetch(doc_set, i));
        Indexer_Add_Doc(indexer, doc, 1.0f);
        DECREF(doc);
    }

    Indexer_Commit(indexer);

    DECREF(doc_set);
    DECREF(indexer);
    DECREF(schema);
        
    return (Folder*)folder;
}
コード例 #4
0
ファイル: TestHighlighter.c プロジェクト: carriercomm/lucy
static void
test_hl_selection(TestBatchRunner *runner) {
    Schema *schema = Schema_new();
    StandardTokenizer *tokenizer = StandardTokenizer_new();
    FullTextType *plain_type = FullTextType_new((Analyzer*)tokenizer);
    FullTextType_Set_Highlightable(plain_type, true);
    String *content = (String*)SSTR_WRAP_UTF8("content", 7);
    Schema_Spec_Field(schema, content, (FieldType*)plain_type);
    DECREF(plain_type);
    DECREF(tokenizer);

    RAMFolder *folder = RAMFolder_new(NULL);
    Indexer *indexer = Indexer_new(schema, (Obj*)folder, NULL, 0);

    static char test_string[] =
        "bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla. "
        "bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla. "
        "bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla. "
        "bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla. "
        "bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla. "
        "bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla NNN bla. "
        "bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla. "
        "bla bla bla MMM bla bla bla bla bla bla bla bla bla bla bla bla. "
        "bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla. "
        "bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla. "
        "bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla. "
        "bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla. "
        "bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla bla. ";
    Doc *doc = Doc_new(NULL, 0);
    String *string = (String *)SSTR_WRAP_UTF8(test_string, strlen(test_string));
    Doc_Store(doc, content, (Obj*)string);
    Indexer_Add_Doc(indexer, doc, 1.0f);
    DECREF(doc);

    Indexer_Commit(indexer);
    DECREF(indexer);

    Searcher *searcher = (Searcher*)IxSearcher_new((Obj*)folder);
    Obj *query = (Obj*)SSTR_WRAP_UTF8("NNN MMM", 7);
    Highlighter *highlighter = Highlighter_new(searcher, query, content, 200);
    Hits *hits = Searcher_Hits(searcher, query, 0, 10, NULL);
    HitDoc *hit = Hits_Next(hits);
    String *excerpt = Highlighter_Create_Excerpt(highlighter, hit);
    String *mmm = (String*)SSTR_WRAP_UTF8("MMM", 3);
    String *nnn = (String*)SSTR_WRAP_UTF8("NNN", 3);
    TEST_TRUE(runner, Str_Find(excerpt, mmm) >= 0 || Str_Find(excerpt, nnn) >= 0,
              "Sentence boundary algo doesn't chop terms");

    DECREF(excerpt);
    DECREF(hit);
    DECREF(hits);
    DECREF(highlighter);
    DECREF(searcher);
    DECREF(folder);
    DECREF(schema);
}
コード例 #5
0
ファイル: TestSimple.c プロジェクト: kidaa/lucy
static void
test_simple(TestBatchRunner *runner) {
    RAMFolder *folder   = RAMFolder_new(NULL);
    String    *language = SSTR_WRAP_UTF8("en", 2);
    Simple    *lucy     = Simple_new((Obj*)folder, language);

    String *food_field = SSTR_WRAP_UTF8("food", 4);

    {
        Doc *doc = Doc_new(NULL, 0);
        String *value = SSTR_WRAP_UTF8("creamed corn", 12);
        Doc_Store(doc, food_field, (Obj*)value);
        Simple_Add_Doc(lucy, doc);
        DECREF(doc);

        String *query = SSTR_WRAP_UTF8("creamed", 7);
        uint32_t num_results = Simple_Search(lucy, query, 0, 10);
        TEST_INT_EQ(runner, num_results, 1, "Search works right after add");
    }

    {
        Doc *doc = Doc_new(NULL, 0);
        String *value = SSTR_WRAP_UTF8("creamed spinach", 15);
        Doc_Store(doc, food_field, (Obj*)value);
        Simple_Add_Doc(lucy, doc);
        DECREF(doc);

        String *query = SSTR_WRAP_UTF8("creamed", 7);
        uint32_t num_results = Simple_Search(lucy, query, 0, 10);
        TEST_INT_EQ(runner, num_results, 2, "Search returns total hits");
    }

    {
        Doc *doc = Doc_new(NULL, 0);
        String *value = SSTR_WRAP_UTF8("creamed broccoli", 16);
        Doc_Store(doc, food_field, (Obj*)value);
        Simple_Add_Doc(lucy, doc);
        DECREF(doc);

        DECREF(lucy);
        lucy = Simple_new((Obj*)folder, language);

        String *query = SSTR_WRAP_UTF8("cream", 5);
        uint32_t num_results = Simple_Search(lucy, query, 0, 10);
        TEST_INT_EQ(runner, num_results, 3, "commit upon destroy");

        HitDoc *hit;
        while ((hit = Simple_Next(lucy)) != NULL) {
            String *food = (String*)HitDoc_Extract(hit, food_field);
            TEST_TRUE(runner, Str_Starts_With_Utf8(food, "cream", 5), "Next");
            DECREF(food);
            DECREF(hit);
        }
    }

    {
        Doc *doc = Doc_new(NULL, 0);
        String *band_field = SSTR_WRAP_UTF8("band", 4);
        String *value = SSTR_WRAP_UTF8("Cream", 5);
        Doc_Store(doc, band_field, (Obj*)value);
        Simple_Add_Doc(lucy, doc);
        DECREF(doc);

        String *query = SSTR_WRAP_UTF8("cream", 5);
        uint32_t num_results = Simple_Search(lucy, query, 0, 10);
        TEST_INT_EQ(runner, num_results, 4,
                    "Search uses correct EasyAnalyzer");
    }

    DECREF(lucy);
    DECREF(folder);
}
コード例 #6
0
ファイル: TestQueryParserSyntax.c プロジェクト: kidaa/lucy
static Folder*
build_index() {
    // Plain type.
    String         *pattern   = Str_newf("\\S+");
    RegexTokenizer *tokenizer = RegexTokenizer_new(pattern);
    FullTextType   *plain     = FullTextType_new((Analyzer*)tokenizer);

    // Fancy type.

    String         *word_pattern   = Str_newf("\\w+");
    RegexTokenizer *word_tokenizer = RegexTokenizer_new(word_pattern);

    Hash *stop_list = Hash_new(0);
    Hash_Store_Utf8(stop_list, "x", 1, (Obj*)CFISH_TRUE);
    SnowballStopFilter *stop_filter = SnowStop_new(NULL, stop_list);

    Vector *analyzers = Vec_new(0);
    Vec_Push(analyzers, (Obj*)word_tokenizer);
    Vec_Push(analyzers, (Obj*)stop_filter);
    PolyAnalyzer *fancy_analyzer = PolyAnalyzer_new(NULL, analyzers);

    FullTextType *fancy = FullTextType_new((Analyzer*)fancy_analyzer);

    // Schema.
    Schema *schema   = Schema_new();
    String *plain_str = Str_newf("plain");
    String *fancy_str = Str_newf("fancy");
    Schema_Spec_Field(schema, plain_str, (FieldType*)plain);
    Schema_Spec_Field(schema, fancy_str, (FieldType*)fancy);

    // Indexer.
    RAMFolder *folder  = RAMFolder_new(NULL);
    Indexer   *indexer = Indexer_new(schema, (Obj*)folder, NULL, 0);

    // Index documents.
    Vector *doc_set = TestUtils_doc_set();
    for (uint32_t i = 0; i < Vec_Get_Size(doc_set); ++i) {
        String *content_string = (String*)Vec_Fetch(doc_set, i);
        Doc *doc = Doc_new(NULL, 0);
        Doc_Store(doc, plain_str, (Obj*)content_string);
        Doc_Store(doc, fancy_str, (Obj*)content_string);
        Indexer_Add_Doc(indexer, doc, 1.0);
        DECREF(doc);
    }
    Indexer_Commit(indexer);

    // Clean up.
    DECREF(doc_set);
    DECREF(indexer);
    DECREF(fancy_str);
    DECREF(plain_str);
    DECREF(schema);
    DECREF(fancy);
    DECREF(fancy_analyzer);
    DECREF(analyzers);
    DECREF(stop_list);
    DECREF(word_pattern);
    DECREF(plain);
    DECREF(tokenizer);
    DECREF(pattern);

    return (Folder*)folder;
}
コード例 #7
0
ファイル: indexer.c プロジェクト: apache/lucy
Doc*
S_parse_file(const char *filename) {
    size_t bytes = strlen(uscon_source) + 1 + strlen(filename) + 1;
    char *path = (char*)malloc(bytes);
    path[0] = '\0';
    strcat(path, uscon_source);
    strcat(path, "/");
    strcat(path, filename);

    FILE *stream = fopen(path, "r");
    if (stream == NULL) {
        perror(path);
        exit(1);
    }

    char *title    = NULL;
    char *bodytext = NULL;
    if (fscanf(stream, "%m[^\r\n] %m[\x01-\x7F]", &title, &bodytext) != 2) {
        fprintf(stderr, "Can't extract title/bodytext from '%s'", path);
        exit(1);
    }

    const char *category = NULL;
    if (S_starts_with(filename, "art")) {
        category = "article";
    }
    else if (S_starts_with(filename, "amend")) {
        category = "amendment";
    }
    else if (S_starts_with(filename, "preamble")) {
        category = "preamble";
    }
    else {
        fprintf(stderr, "Can't derive category for %s", filename);
        exit(1);
    }

    Doc *doc = Doc_new(NULL, 0);

    {
        // Store 'title' field
        String *field = Str_newf("title");
        String *value = Str_new_from_utf8(title, strlen(title));
        Doc_Store(doc, field, (Obj*)value);
        DECREF(field);
        DECREF(value);
    }

    {
        // Store 'content' field
        String *field = Str_newf("content");
        String *value = Str_new_from_utf8(bodytext, strlen(bodytext));
        Doc_Store(doc, field, (Obj*)value);
        DECREF(field);
        DECREF(value);
    }

    {
        // Store 'url' field
        String *field = Str_newf("url");
        String *value = Str_new_from_utf8(filename, strlen(filename));
        Doc_Store(doc, field, (Obj*)value);
        DECREF(field);
        DECREF(value);
    }

    {
        // Store 'category' field
        String *field = Str_newf("category");
        String *value = Str_new_from_utf8(category, strlen(category));
        Doc_Store(doc, field, (Obj*)value);
        DECREF(field);
        DECREF(value);
    }

    fclose(stream);
    free(bodytext);
    free(title);
    free(path);
    return doc;
}