CaseFolder* CaseFolder_init(CaseFolder *self) { Analyzer_init((Analyzer*)self); CaseFolderIVARS *const ivars = CaseFolder_IVARS(self); ivars->normalizer = Normalizer_new(NULL, true, false); return self; }
EasyAnalyzer* EasyAnalyzer_init(EasyAnalyzer *self, const CharBuf *language) { Analyzer_init((Analyzer*)self); EasyAnalyzerIVARS *const ivars = EasyAnalyzer_IVARS(self); ivars->language = CB_Clone(language); ivars->tokenizer = StandardTokenizer_new(); ivars->normalizer = Normalizer_new(NULL, true, false); ivars->stemmer = SnowStemmer_new(language); return self; }
SnowballStemmer* SnowStemmer_init(SnowballStemmer *self, String *language) { char lang_buf[3]; Analyzer_init((Analyzer*)self); SnowballStemmerIVARS *const ivars = SnowStemmer_IVARS(self); ivars->language = Str_Clone(language); // Get a Snowball stemmer. Be case-insensitive. lang_buf[0] = tolower(Str_Code_Point_At(language, 0)); lang_buf[1] = tolower(Str_Code_Point_At(language, 1)); lang_buf[2] = '\0'; ivars->snowstemmer = sb_stemmer_new(lang_buf, "UTF_8"); if (!ivars->snowstemmer) { THROW(ERR, "Can't find a Snowball stemmer for %o", language); } return self; }
Stopalizer* Stopalizer_init(Stopalizer *self, const CharBuf *language, Hash *stoplist) { Analyzer_init((Analyzer*)self); if (stoplist) { if (language) { THROW(ERR, "Can't have both stoplist and language"); } self->stoplist = (Hash*)INCREF(stoplist); } else if (language) { self->stoplist = Stopalizer_gen_stoplist(language); if (!self->stoplist) THROW(ERR, "Can't get a stoplist for '%o'", language); } else { THROW(ERR, "Either stoplist or language is required"); } return self; }
SnowballStopFilter* SnowStop_init(SnowballStopFilter *self, String *language, Hash *stoplist) { Analyzer_init((Analyzer*)self); SnowballStopFilterIVARS *const ivars = SnowStop_IVARS(self); if (stoplist) { if (language) { THROW(ERR, "Can't have both stoplist and language"); } ivars->stoplist = (Hash*)INCREF(stoplist); } else if (language) { ivars->stoplist = SnowStop_gen_stoplist(language); if (!ivars->stoplist) { THROW(ERR, "Can't get a stoplist for '%o'", language); } } else { THROW(ERR, "Either stoplist or language is required"); } return self; }
PolyAnalyzer* PolyAnalyzer_init(PolyAnalyzer *self, const CharBuf *language, VArray *analyzers) { Analyzer_init((Analyzer*)self); if (analyzers) { for (uint32_t i = 0, max = VA_Get_Size(analyzers); i < max; i++) { CERTIFY(VA_Fetch(analyzers, i), ANALYZER); } self->analyzers = (VArray*)INCREF(analyzers); } else if (language) { self->analyzers = VA_new(3); VA_Push(self->analyzers, (Obj*)CaseFolder_new()); VA_Push(self->analyzers, (Obj*)RegexTokenizer_new(NULL)); VA_Push(self->analyzers, (Obj*)SnowStemmer_new(language)); } else { THROW(ERR, "Must specify either 'language' or 'analyzers'"); } return self; }
PolyAnalyzer* PolyAnalyzer_init(PolyAnalyzer *self, String *language, Vector *analyzers) { Analyzer_init((Analyzer*)self); PolyAnalyzerIVARS *const ivars = PolyAnalyzer_IVARS(self); if (analyzers) { for (uint32_t i = 0, max = Vec_Get_Size(analyzers); i < max; i++) { CERTIFY(Vec_Fetch(analyzers, i), ANALYZER); } ivars->analyzers = (Vector*)INCREF(analyzers); } else if (language) { ivars->analyzers = Vec_new(3); Vec_Push(ivars->analyzers, (Obj*)CaseFolder_new()); Vec_Push(ivars->analyzers, (Obj*)RegexTokenizer_new(NULL)); Vec_Push(ivars->analyzers, (Obj*)SnowStemmer_new(language)); } else { THROW(ERR, "Must specify either 'language' or 'analyzers'"); } return self; }
WhitespaceTokenizer* WhitespaceTokenizer_init(WhitespaceTokenizer *self) { Analyzer_init((Analyzer*)self); return self; }
StandardTokenizer* StandardTokenizer_init(StandardTokenizer *self) { Analyzer_init((Analyzer*)self); return self; }