Beispiel #1
0
CaseFolder*
CaseFolder_init(CaseFolder *self) {
    Analyzer_init((Analyzer*)self);
    CaseFolderIVARS *const ivars = CaseFolder_IVARS(self);
    ivars->normalizer = Normalizer_new(NULL, true, false);
    return self;
}
Beispiel #2
0
EasyAnalyzer*
EasyAnalyzer_init(EasyAnalyzer *self, const CharBuf *language) {
    Analyzer_init((Analyzer*)self);
    EasyAnalyzerIVARS *const ivars = EasyAnalyzer_IVARS(self);
    ivars->language   = CB_Clone(language);
    ivars->tokenizer  = StandardTokenizer_new();
    ivars->normalizer = Normalizer_new(NULL, true, false);
    ivars->stemmer    = SnowStemmer_new(language);
    return self;
}
SnowballStemmer*
SnowStemmer_init(SnowballStemmer *self, String *language) {
    char lang_buf[3];
    Analyzer_init((Analyzer*)self);
    SnowballStemmerIVARS *const ivars = SnowStemmer_IVARS(self);
    ivars->language = Str_Clone(language);

    // Get a Snowball stemmer.  Be case-insensitive.
    lang_buf[0] = tolower(Str_Code_Point_At(language, 0));
    lang_buf[1] = tolower(Str_Code_Point_At(language, 1));
    lang_buf[2] = '\0';
    ivars->snowstemmer = sb_stemmer_new(lang_buf, "UTF_8");
    if (!ivars->snowstemmer) {
        THROW(ERR, "Can't find a Snowball stemmer for %o", language);
    }

    return self;
}
Beispiel #4
0
Stopalizer*
Stopalizer_init(Stopalizer *self, const CharBuf *language, Hash *stoplist)
{
    Analyzer_init((Analyzer*)self);

    if (stoplist) {
        if (language) { THROW(ERR, "Can't have both stoplist and language"); }
        self->stoplist = (Hash*)INCREF(stoplist);
    }
    else if (language) {
        self->stoplist = Stopalizer_gen_stoplist(language);
        if (!self->stoplist)
            THROW(ERR, "Can't get a stoplist for '%o'", language);
    }
    else {
        THROW(ERR, "Either stoplist or language is required");
    }

    return self;
}
Beispiel #5
0
SnowballStopFilter*
SnowStop_init(SnowballStopFilter *self, String *language,
              Hash *stoplist) {
    Analyzer_init((Analyzer*)self);
    SnowballStopFilterIVARS *const ivars = SnowStop_IVARS(self);

    if (stoplist) {
        if (language) { THROW(ERR, "Can't have both stoplist and language"); }
        ivars->stoplist = (Hash*)INCREF(stoplist);
    }
    else if (language) {
        ivars->stoplist = SnowStop_gen_stoplist(language);
        if (!ivars->stoplist) {
            THROW(ERR, "Can't get a stoplist for '%o'", language);
        }
    }
    else {
        THROW(ERR, "Either stoplist or language is required");
    }

    return self;
}
Beispiel #6
0
PolyAnalyzer*
PolyAnalyzer_init(PolyAnalyzer *self, const CharBuf *language,
                  VArray *analyzers) {
    Analyzer_init((Analyzer*)self);
    if (analyzers) {
        for (uint32_t i = 0, max = VA_Get_Size(analyzers); i < max; i++) {
            CERTIFY(VA_Fetch(analyzers, i), ANALYZER);
        }
        self->analyzers = (VArray*)INCREF(analyzers);
    }
    else if (language) {
        self->analyzers = VA_new(3);
        VA_Push(self->analyzers, (Obj*)CaseFolder_new());
        VA_Push(self->analyzers, (Obj*)RegexTokenizer_new(NULL));
        VA_Push(self->analyzers, (Obj*)SnowStemmer_new(language));
    }
    else {
        THROW(ERR, "Must specify either 'language' or 'analyzers'");
    }

    return self;
}
Beispiel #7
0
PolyAnalyzer*
PolyAnalyzer_init(PolyAnalyzer *self, String *language,
                  Vector *analyzers) {
    Analyzer_init((Analyzer*)self);
    PolyAnalyzerIVARS *const ivars = PolyAnalyzer_IVARS(self);

    if (analyzers) {
        for (uint32_t i = 0, max = Vec_Get_Size(analyzers); i < max; i++) {
            CERTIFY(Vec_Fetch(analyzers, i), ANALYZER);
        }
        ivars->analyzers = (Vector*)INCREF(analyzers);
    }
    else if (language) {
        ivars->analyzers = Vec_new(3);
        Vec_Push(ivars->analyzers, (Obj*)CaseFolder_new());
        Vec_Push(ivars->analyzers, (Obj*)RegexTokenizer_new(NULL));
        Vec_Push(ivars->analyzers, (Obj*)SnowStemmer_new(language));
    }
    else {
        THROW(ERR, "Must specify either 'language' or 'analyzers'");
    }

    return self;
}
WhitespaceTokenizer*
WhitespaceTokenizer_init(WhitespaceTokenizer *self) {
    Analyzer_init((Analyzer*)self);
    return self;
}
Beispiel #9
0
StandardTokenizer*
StandardTokenizer_init(StandardTokenizer *self) {
    Analyzer_init((Analyzer*)self);
    return self;
}