コード例 #1
0
ファイル: TestRegexTokenizer.c プロジェクト: hernan604/lucy
static void
test_Dump_Load_and_Equals(TestBatchRunner *runner) {
    if (!RegexTokenizer_is_available()) {
        SKIP(runner, "RegexTokenizer not available");
        SKIP(runner, "RegexTokenizer not available");
        SKIP(runner, "RegexTokenizer not available");
        return;
    }

    StackString *word_char_pattern  = SSTR_WRAP_UTF8("\\w+", 3);
    StackString *whitespace_pattern = SSTR_WRAP_UTF8("\\S+", 3);
    RegexTokenizer *word_char_tokenizer
        = RegexTokenizer_new((String*)word_char_pattern);
    RegexTokenizer *whitespace_tokenizer
        = RegexTokenizer_new((String*)whitespace_pattern);
    Obj *word_char_dump  = RegexTokenizer_Dump(word_char_tokenizer);
    Obj *whitespace_dump = RegexTokenizer_Dump(whitespace_tokenizer);
    RegexTokenizer *word_char_clone
        = RegexTokenizer_Load(whitespace_tokenizer, word_char_dump);
    RegexTokenizer *whitespace_clone
        = RegexTokenizer_Load(whitespace_tokenizer, whitespace_dump);

    TEST_FALSE(runner,
               RegexTokenizer_Equals(word_char_tokenizer, (Obj*)whitespace_tokenizer),
               "Equals() false with different pattern");
    TEST_TRUE(runner,
              RegexTokenizer_Equals(word_char_tokenizer, (Obj*)word_char_clone),
              "Dump => Load round trip");
    TEST_TRUE(runner,
              RegexTokenizer_Equals(whitespace_tokenizer, (Obj*)whitespace_clone),
              "Dump => Load round trip");

    DECREF(word_char_tokenizer);
    DECREF(word_char_dump);
    DECREF(word_char_clone);
    DECREF(whitespace_tokenizer);
    DECREF(whitespace_dump);
    DECREF(whitespace_clone);
}
コード例 #2
0
ファイル: PolyAnalyzer.c プロジェクト: pavansondur/lucy
PolyAnalyzer*
PolyAnalyzer_init(PolyAnalyzer *self, const CharBuf *language,
                  VArray *analyzers) {
    Analyzer_init((Analyzer*)self);
    if (analyzers) {
        for (uint32_t i = 0, max = VA_Get_Size(analyzers); i < max; i++) {
            CERTIFY(VA_Fetch(analyzers, i), ANALYZER);
        }
        self->analyzers = (VArray*)INCREF(analyzers);
    }
    else if (language) {
        self->analyzers = VA_new(3);
        VA_Push(self->analyzers, (Obj*)CaseFolder_new());
        VA_Push(self->analyzers, (Obj*)RegexTokenizer_new(NULL));
        VA_Push(self->analyzers, (Obj*)SnowStemmer_new(language));
    }
    else {
        THROW(ERR, "Must specify either 'language' or 'analyzers'");
    }

    return self;
}
コード例 #3
0
ファイル: PolyAnalyzer.c プロジェクト: carriercomm/lucy
PolyAnalyzer*
PolyAnalyzer_init(PolyAnalyzer *self, String *language,
                  Vector *analyzers) {
    Analyzer_init((Analyzer*)self);
    PolyAnalyzerIVARS *const ivars = PolyAnalyzer_IVARS(self);

    if (analyzers) {
        for (uint32_t i = 0, max = Vec_Get_Size(analyzers); i < max; i++) {
            CERTIFY(Vec_Fetch(analyzers, i), ANALYZER);
        }
        ivars->analyzers = (Vector*)INCREF(analyzers);
    }
    else if (language) {
        ivars->analyzers = Vec_new(3);
        Vec_Push(ivars->analyzers, (Obj*)CaseFolder_new());
        Vec_Push(ivars->analyzers, (Obj*)RegexTokenizer_new(NULL));
        Vec_Push(ivars->analyzers, (Obj*)SnowStemmer_new(language));
    }
    else {
        THROW(ERR, "Must specify either 'language' or 'analyzers'");
    }

    return self;
}
コード例 #4
0
ファイル: TestQueryParserSyntax.c プロジェクト: kidaa/lucy
static Folder*
build_index() {
    // Plain type.
    String         *pattern   = Str_newf("\\S+");
    RegexTokenizer *tokenizer = RegexTokenizer_new(pattern);
    FullTextType   *plain     = FullTextType_new((Analyzer*)tokenizer);

    // Fancy type.

    String         *word_pattern   = Str_newf("\\w+");
    RegexTokenizer *word_tokenizer = RegexTokenizer_new(word_pattern);

    Hash *stop_list = Hash_new(0);
    Hash_Store_Utf8(stop_list, "x", 1, (Obj*)CFISH_TRUE);
    SnowballStopFilter *stop_filter = SnowStop_new(NULL, stop_list);

    Vector *analyzers = Vec_new(0);
    Vec_Push(analyzers, (Obj*)word_tokenizer);
    Vec_Push(analyzers, (Obj*)stop_filter);
    PolyAnalyzer *fancy_analyzer = PolyAnalyzer_new(NULL, analyzers);

    FullTextType *fancy = FullTextType_new((Analyzer*)fancy_analyzer);

    // Schema.
    Schema *schema   = Schema_new();
    String *plain_str = Str_newf("plain");
    String *fancy_str = Str_newf("fancy");
    Schema_Spec_Field(schema, plain_str, (FieldType*)plain);
    Schema_Spec_Field(schema, fancy_str, (FieldType*)fancy);

    // Indexer.
    RAMFolder *folder  = RAMFolder_new(NULL);
    Indexer   *indexer = Indexer_new(schema, (Obj*)folder, NULL, 0);

    // Index documents.
    Vector *doc_set = TestUtils_doc_set();
    for (uint32_t i = 0; i < Vec_Get_Size(doc_set); ++i) {
        String *content_string = (String*)Vec_Fetch(doc_set, i);
        Doc *doc = Doc_new(NULL, 0);
        Doc_Store(doc, plain_str, (Obj*)content_string);
        Doc_Store(doc, fancy_str, (Obj*)content_string);
        Indexer_Add_Doc(indexer, doc, 1.0);
        DECREF(doc);
    }
    Indexer_Commit(indexer);

    // Clean up.
    DECREF(doc_set);
    DECREF(indexer);
    DECREF(fancy_str);
    DECREF(plain_str);
    DECREF(schema);
    DECREF(fancy);
    DECREF(fancy_analyzer);
    DECREF(analyzers);
    DECREF(stop_list);
    DECREF(word_pattern);
    DECREF(plain);
    DECREF(tokenizer);
    DECREF(pattern);

    return (Folder*)folder;
}