StandardAnalyzer::StandardAnalyzer(const char* stopwordsFile, const char* enc): stopSet(_CLNEW CLTCSetList(true)), maxTokenLength(DEFAULT_MAX_TOKEN_LENGTH) { if ( enc == NULL ) enc = "ASCII"; WordlistLoader::getWordSet(stopwordsFile, enc, stopSet); }
StandardAnalyzer::StandardAnalyzer( const TCHAR** stopWords): stopSet(_CLNEW CLTCSetList(true)), maxTokenLength(DEFAULT_MAX_TOKEN_LENGTH) { StopFilter::fillStopTable( stopSet,stopWords ); }
StandardAnalyzer::StandardAnalyzer( const TCHAR** stopWords): stopSet(_CLNEW CLTCSetList(true)), maxTokenLength(DEFAULT_MAX_TOKEN_LENGTH) { StopFilter::fillStopTable( stopSet,stopWords ); } StandardAnalyzer::StandardAnalyzer(const char* stopwordsFile, const char* enc): stopSet(_CLNEW CLTCSetList(true)), maxTokenLength(DEFAULT_MAX_TOKEN_LENGTH) { if ( enc == NULL ) enc = "ASCII"; WordlistLoader::getWordSet(stopwordsFile, enc, stopSet); } StandardAnalyzer::StandardAnalyzer(CL_NS(util)::Reader* stopwordsReader, const bool _bDeleteReader): stopSet(_CLNEW CLTCSetList(true)), maxTokenLength(DEFAULT_MAX_TOKEN_LENGTH) { WordlistLoader::getWordSet(stopwordsReader, stopSet, _bDeleteReader); } class StandardAnalyzer::SavedStreams : public TokenStream { public: StandardTokenizer* tokenStream; TokenStream* filteredTokenStream; SavedStreams():tokenStream(NULL), filteredTokenStream(NULL) { } void close(){} Token* next(Token* token) {return NULL;}
StandardAnalyzer::StandardAnalyzer(): stopSet(_CLNEW CLTCSetList(true)), maxTokenLength(DEFAULT_MAX_TOKEN_LENGTH) { StopFilter::fillStopTable( stopSet,CL_NS(analysis)::StopAnalyzer::ENGLISH_STOP_WORDS); }
/** Builds the named analyzer with the given stop words. */ BRSnowballAnalyzer::BRSnowballAnalyzer(const TCHAR* language, const TCHAR** stopWords) { this->language = STRDUP_TtoT(language); stopSet = _CLNEW CLTCSetList(true); StopFilter::fillStopTable(stopSet,stopWords); }