TokenStreamPtr WhitespaceAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { TokenizerPtr tokenizer(boost::dynamic_pointer_cast<Tokenizer>(getPreviousTokenStream())); if (!tokenizer) { tokenizer = newLucene<WhitespaceTokenizer>(reader); setPreviousTokenStream(tokenizer); } else tokenizer->reset(reader); return tokenizer; }
TokenStreamPtr ChineseAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { ChineseAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast<ChineseAnalyzerSavedStreams>(getPreviousTokenStream())); if (!streams) { streams = newLucene<ChineseAnalyzerSavedStreams>(); streams->source = newLucene<ChineseTokenizer>(reader); streams->result = newLucene<ChineseFilter>(streams->source); setPreviousTokenStream(streams); } else streams->source->reset(reader); return streams->result; }
TokenStreamPtr RussianAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { RussianAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast<RussianAnalyzerSavedStreams>(getPreviousTokenStream())); if (!streams) { streams = newLucene<RussianAnalyzerSavedStreams>(); streams->source = newLucene<RussianLetterTokenizer>(reader); streams->result = newLucene<LowerCaseFilter>(streams->source); streams->result = newLucene<StopFilter>(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stopSet); streams->result = newLucene<RussianStemFilter>(streams->result); setPreviousTokenStream(streams); } else { streams->source->reset(reader); } return streams->result; }
TokenStreamPtr DutchAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { DutchAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast<DutchAnalyzerSavedStreams>(getPreviousTokenStream())); if (!streams) { streams = newLucene<DutchAnalyzerSavedStreams>(); streams->source = newLucene<StandardTokenizer>(matchVersion, reader); streams->result = newLucene<StandardFilter>(streams->source); streams->result = newLucene<StopFilter>(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); streams->result = newLucene<DutchStemFilter>(streams->result, excltable); setPreviousTokenStream(streams); } else streams->source->reset(reader); return streams->result; }
TokenStreamPtr StandardAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { StandardAnalyzerSavedStreamsPtr streams = boost::dynamic_pointer_cast<StandardAnalyzerSavedStreams>(getPreviousTokenStream()); if (!streams) { streams = newLucene<StandardAnalyzerSavedStreams>(); setPreviousTokenStream(streams); streams->tokenStream = newLucene<StandardTokenizer>(matchVersion, reader); streams->filteredTokenStream = newLucene<StandardFilter>(streams->tokenStream); streams->filteredTokenStream = newLucene<LowerCaseFilter>(streams->filteredTokenStream); streams->filteredTokenStream = newLucene<StopFilter>(enableStopPositionIncrements, streams->filteredTokenStream, stopSet); } else streams->tokenStream->reset(reader); streams->tokenStream->setMaxTokenLength(maxTokenLength); streams->tokenStream->setReplaceInvalidAcronym(replaceInvalidAcronym); return streams->filteredTokenStream; }
void DutchAnalyzer::setStemExclusionTable(HashSet<String> exclusions) { excltable = exclusions; setPreviousTokenStream(LuceneObjectPtr()); // force a new stemmer to be created }