Пример #1
0
static void
test_Dump_Load_and_Equals(TestBatchRunner *runner) {
    String *EN = (String*)SSTR_WRAP_UTF8("en", 2);
    String *ES = (String*)SSTR_WRAP_UTF8("es", 2);
    SnowballStemmer *stemmer = SnowStemmer_new(EN);
    SnowballStemmer *other   = SnowStemmer_new(ES);
    Obj *dump       = (Obj*)SnowStemmer_Dump(stemmer);
    Obj *other_dump = (Obj*)SnowStemmer_Dump(other);
    SnowballStemmer *clone       = (SnowballStemmer*)SnowStemmer_Load(other, dump);
    SnowballStemmer *other_clone = (SnowballStemmer*)SnowStemmer_Load(other, other_dump);

    TEST_FALSE(runner,
               SnowStemmer_Equals(stemmer, (Obj*)other),
               "Equals() false with different language");
    TEST_TRUE(runner,
              SnowStemmer_Equals(stemmer, (Obj*)clone),
              "Dump => Load round trip");
    TEST_TRUE(runner,
              SnowStemmer_Equals(other, (Obj*)other_clone),
              "Dump => Load round trip");

    DECREF(stemmer);
    DECREF(dump);
    DECREF(clone);
    DECREF(other);
    DECREF(other_dump);
    DECREF(other_clone);
}
Пример #2
0
static void
test_stemming(TestBatchRunner *runner) {
    FSFolder *modules_folder = TestUtils_modules_folder();
    String *path = Str_newf("analysis/snowstem/source/test/tests.json");
    Hash *tests = (Hash*)Json_slurp_json((Folder*)modules_folder, path);
    if (!tests) { RETHROW(Err_get_error()); }

    String *iso;
    Hash *lang_data;
    Hash_Iterate(tests);
    while (Hash_Next(tests, (Obj**)&iso, (Obj**)&lang_data)) {
        VArray *words = (VArray*)Hash_Fetch_Utf8(lang_data, "words", 5);
        VArray *stems = (VArray*)Hash_Fetch_Utf8(lang_data, "stems", 5);
        SnowballStemmer *stemmer = SnowStemmer_new(iso);
        for (uint32_t i = 0, max = VA_Get_Size(words); i < max; i++) {
            String *word  = (String*)VA_Fetch(words, i);
            VArray *got   = SnowStemmer_Split(stemmer, word);
            String *stem  = (String*)VA_Fetch(got, 0);
            TEST_TRUE(runner,
                      stem
                      && Str_Is_A(stem, STRING)
                      && Str_Equals(stem, VA_Fetch(stems, i)),
                      "Stem %s: %s", Str_Get_Ptr8(iso), Str_Get_Ptr8(word)
                     );
            DECREF(got);
        }
        DECREF(stemmer);
    }

    DECREF(tests);
    DECREF(modules_folder);
    DECREF(path);
}
Пример #3
0
EasyAnalyzer*
EasyAnalyzer_init(EasyAnalyzer *self, const CharBuf *language) {
    Analyzer_init((Analyzer*)self);
    EasyAnalyzerIVARS *const ivars = EasyAnalyzer_IVARS(self);
    ivars->language   = CB_Clone(language);
    ivars->tokenizer  = StandardTokenizer_new();
    ivars->normalizer = Normalizer_new(NULL, true, false);
    ivars->stemmer    = SnowStemmer_new(language);
    return self;
}
Пример #4
0
PolyAnalyzer*
PolyAnalyzer_init(PolyAnalyzer *self, const CharBuf *language,
                  VArray *analyzers) {
    Analyzer_init((Analyzer*)self);
    if (analyzers) {
        for (uint32_t i = 0, max = VA_Get_Size(analyzers); i < max; i++) {
            CERTIFY(VA_Fetch(analyzers, i), ANALYZER);
        }
        self->analyzers = (VArray*)INCREF(analyzers);
    }
    else if (language) {
        self->analyzers = VA_new(3);
        VA_Push(self->analyzers, (Obj*)CaseFolder_new());
        VA_Push(self->analyzers, (Obj*)RegexTokenizer_new(NULL));
        VA_Push(self->analyzers, (Obj*)SnowStemmer_new(language));
    }
    else {
        THROW(ERR, "Must specify either 'language' or 'analyzers'");
    }

    return self;
}
Пример #5
0
PolyAnalyzer*
PolyAnalyzer_init(PolyAnalyzer *self, String *language,
                  Vector *analyzers) {
    Analyzer_init((Analyzer*)self);
    PolyAnalyzerIVARS *const ivars = PolyAnalyzer_IVARS(self);

    if (analyzers) {
        for (uint32_t i = 0, max = Vec_Get_Size(analyzers); i < max; i++) {
            CERTIFY(Vec_Fetch(analyzers, i), ANALYZER);
        }
        ivars->analyzers = (Vector*)INCREF(analyzers);
    }
    else if (language) {
        ivars->analyzers = Vec_new(3);
        Vec_Push(ivars->analyzers, (Obj*)CaseFolder_new());
        Vec_Push(ivars->analyzers, (Obj*)RegexTokenizer_new(NULL));
        Vec_Push(ivars->analyzers, (Obj*)SnowStemmer_new(language));
    }
    else {
        THROW(ERR, "Must specify either 'language' or 'analyzers'");
    }

    return self;
}